VoCuc commited on
Commit
ea79b7f
·
verified ·
1 Parent(s): a80148e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. logs/distillm_2_qwen2.5_1.5b_it.log +0 -0
  3. logs/llama3.2_3b_it_nnm_distillm_2_1e.log +0 -0
  4. logs/nnm_distillm_2_qwen2.5_1.5b.log +1 -0
  5. qwen2.5-1.5B-it-distillm2/README.md +62 -0
  6. qwen2.5-1.5B-it-distillm2/added_tokens.json +24 -0
  7. qwen2.5-1.5B-it-distillm2/all_results.json +18 -0
  8. qwen2.5-1.5B-it-distillm2/checkpoint-1246/added_tokens.json +24 -0
  9. qwen2.5-1.5B-it-distillm2/checkpoint-1246/config.json +29 -0
  10. qwen2.5-1.5B-it-distillm2/checkpoint-1246/generation_config.json +14 -0
  11. qwen2.5-1.5B-it-distillm2/checkpoint-1246/merges.txt +0 -0
  12. qwen2.5-1.5B-it-distillm2/checkpoint-1246/pytorch_model.bin +3 -0
  13. qwen2.5-1.5B-it-distillm2/checkpoint-1246/special_tokens_map.json +32 -0
  14. qwen2.5-1.5B-it-distillm2/checkpoint-1246/tokenizer.json +3 -0
  15. qwen2.5-1.5B-it-distillm2/checkpoint-1246/tokenizer_config.json +207 -0
  16. qwen2.5-1.5B-it-distillm2/checkpoint-1246/trainer_state.json +1408 -0
  17. qwen2.5-1.5B-it-distillm2/checkpoint-1246/training_args.bin +3 -0
  18. qwen2.5-1.5B-it-distillm2/checkpoint-1246/vocab.json +0 -0
  19. qwen2.5-1.5B-it-distillm2/checkpoint-1869/added_tokens.json +24 -0
  20. qwen2.5-1.5B-it-distillm2/checkpoint-1869/config.json +29 -0
  21. qwen2.5-1.5B-it-distillm2/checkpoint-1869/generation_config.json +14 -0
  22. qwen2.5-1.5B-it-distillm2/checkpoint-1869/merges.txt +0 -0
  23. qwen2.5-1.5B-it-distillm2/checkpoint-1869/pytorch_model.bin +3 -0
  24. qwen2.5-1.5B-it-distillm2/checkpoint-1869/special_tokens_map.json +32 -0
  25. qwen2.5-1.5B-it-distillm2/checkpoint-1869/tokenizer.json +3 -0
  26. qwen2.5-1.5B-it-distillm2/checkpoint-1869/tokenizer_config.json +207 -0
  27. qwen2.5-1.5B-it-distillm2/checkpoint-1869/trainer_state.json +2090 -0
  28. qwen2.5-1.5B-it-distillm2/checkpoint-1869/training_args.bin +3 -0
  29. qwen2.5-1.5B-it-distillm2/checkpoint-1869/vocab.json +0 -0
  30. qwen2.5-1.5B-it-distillm2/checkpoint-623/added_tokens.json +24 -0
  31. qwen2.5-1.5B-it-distillm2/checkpoint-623/config.json +29 -0
  32. qwen2.5-1.5B-it-distillm2/checkpoint-623/generation_config.json +14 -0
  33. qwen2.5-1.5B-it-distillm2/checkpoint-623/merges.txt +0 -0
  34. qwen2.5-1.5B-it-distillm2/checkpoint-623/pytorch_model.bin +3 -0
  35. qwen2.5-1.5B-it-distillm2/checkpoint-623/special_tokens_map.json +32 -0
  36. qwen2.5-1.5B-it-distillm2/checkpoint-623/tokenizer.json +3 -0
  37. qwen2.5-1.5B-it-distillm2/checkpoint-623/tokenizer_config.json +207 -0
  38. qwen2.5-1.5B-it-distillm2/checkpoint-623/trainer_state.json +726 -0
  39. qwen2.5-1.5B-it-distillm2/checkpoint-623/training_args.bin +3 -0
  40. qwen2.5-1.5B-it-distillm2/checkpoint-623/vocab.json +0 -0
  41. qwen2.5-1.5B-it-distillm2/config.json +29 -0
  42. qwen2.5-1.5B-it-distillm2/eval_results.json +12 -0
  43. qwen2.5-1.5B-it-distillm2/generation_config.json +14 -0
  44. qwen2.5-1.5B-it-distillm2/merges.txt +0 -0
  45. qwen2.5-1.5B-it-distillm2/pytorch_model.bin +3 -0
  46. qwen2.5-1.5B-it-distillm2/special_tokens_map.json +32 -0
  47. qwen2.5-1.5B-it-distillm2/tokenizer.json +3 -0
  48. qwen2.5-1.5B-it-distillm2/tokenizer_config.json +207 -0
  49. qwen2.5-1.5B-it-distillm2/train_results.json +9 -0
  50. qwen2.5-1.5B-it-distillm2/trainer_state.json +2099 -0
.gitattributes CHANGED
@@ -49,3 +49,7 @@ llama-3.2-3B-it-distillm2/checkpoint-1869/tokenizer.json filter=lfs diff=lfs mer
49
  llama-3.2-3B-it-distillm2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
  qwen2.5-1.5B-it-distillm2-1epoch/checkpoint-2492/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
  qwen2.5-1.5B-it-distillm2-1epoch/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
49
  llama-3.2-3B-it-distillm2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
  qwen2.5-1.5B-it-distillm2-1epoch/checkpoint-2492/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
  qwen2.5-1.5B-it-distillm2-1epoch/tokenizer.json filter=lfs diff=lfs merge=lfs -text
52
+ qwen2.5-1.5B-it-distillm2/checkpoint-1246/tokenizer.json filter=lfs diff=lfs merge=lfs -text
53
+ qwen2.5-1.5B-it-distillm2/checkpoint-1869/tokenizer.json filter=lfs diff=lfs merge=lfs -text
54
+ qwen2.5-1.5B-it-distillm2/checkpoint-623/tokenizer.json filter=lfs diff=lfs merge=lfs -text
55
+ qwen2.5-1.5B-it-distillm2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
logs/distillm_2_qwen2.5_1.5b_it.log CHANGED
The diff for this file is too large to render. See raw diff
 
logs/llama3.2_3b_it_nnm_distillm_2_1e.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/nnm_distillm_2_qwen2.5_1.5b.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./scripts/train/qwen2.5-1.5B-it/nnm_distillm_2.sh: line 1: [GPU: command not found
qwen2.5-1.5B-it-distillm2/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
5
+ tags:
6
+ - alignment-handbook
7
+ - generated_from_trainer
8
+ datasets:
9
+ - data/reformatted/distill-qwen2.5-14B-Instruct
10
+ model-index:
11
+ - name: qwen2.5-1.5B-it-distillm2
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # qwen2.5-1.5B-it-distillm2
19
+
20
+ This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the data/reformatted/distill-qwen2.5-14B-Instruct dataset.
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 5e-05
40
+ - train_batch_size: 2
41
+ - eval_batch_size: 2
42
+ - seed: 42
43
+ - distributed_type: multi-GPU
44
+ - num_devices: 2
45
+ - gradient_accumulation_steps: 32
46
+ - total_train_batch_size: 128
47
+ - total_eval_batch_size: 4
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_ratio: 0.1
51
+ - num_epochs: 3
52
+
53
+ ### Training results
54
+
55
+
56
+
57
+ ### Framework versions
58
+
59
+ - Transformers 4.45.2
60
+ - Pytorch 2.5.1+cu124
61
+ - Datasets 4.8.5
62
+ - Tokenizers 0.20.3
qwen2.5-1.5B-it-distillm2/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen2.5-1.5B-it-distillm2/all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.999699067108035,
3
+ "eval_logps_logqs/chosen": 0.07430665194988251,
4
+ "eval_logqs/chosen": -1.0447684526443481,
5
+ "eval_logqs/rejected": -1.0230671167373657,
6
+ "eval_logqs_logps/rejected": 0.034393880516290665,
7
+ "eval_loss": 0.17647793889045715,
8
+ "eval_runtime": 1.0546,
9
+ "eval_samples": 10,
10
+ "eval_samples_per_second": 9.482,
11
+ "eval_steps_per_second": 2.845,
12
+ "total_flos": 0.0,
13
+ "train_loss": 0.194109176269121,
14
+ "train_runtime": 23250.2173,
15
+ "train_samples": 79751,
16
+ "train_samples_per_second": 10.29,
17
+ "train_steps_per_second": 0.08
18
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.45.2",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.45.2"
14
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/checkpoint-1246/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:422eaf13971cf4a432e2e4e7b1e5fde87893019bfdf69e70e1805173f6af08e7
3
+ size 3087935634
qwen2.5-1.5B-it-distillm2/checkpoint-1246/special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "bos_token": "<|endoftext|>",
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|endoftext|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
qwen2.5-1.5B-it-distillm2/checkpoint-1246/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": "<|endoftext|>",
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 2048,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/trainer_state.json ADDED
@@ -0,0 +1,1408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9997993780720233,
5
+ "eval_steps": 10000000,
6
+ "global_step": 1246,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0016049754238138228,
13
+ "grad_norm": 7.15625,
14
+ "learning_rate": 2.6737967914438503e-07,
15
+ "logps_logqs/chosen": 0.057332463562488556,
16
+ "logqs/chosen": -0.752794086933136,
17
+ "logqs/rejected": -0.7512239813804626,
18
+ "logqs_logps/rejected": 0.1236220970749855,
19
+ "loss": 0.4632,
20
+ "step": 1
21
+ },
22
+ {
23
+ "epoch": 0.016049754238138227,
24
+ "grad_norm": 5.71875,
25
+ "learning_rate": 2.6737967914438504e-06,
26
+ "logps_logqs/chosen": 0.047926291823387146,
27
+ "logqs/chosen": -0.7439535856246948,
28
+ "logqs/rejected": -0.7035253643989563,
29
+ "logqs_logps/rejected": 0.13991409540176392,
30
+ "loss": 0.4557,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.032099508476276455,
35
+ "grad_norm": 1.9296875,
36
+ "learning_rate": 5.347593582887701e-06,
37
+ "logps_logqs/chosen": 0.03161158040165901,
38
+ "logqs/chosen": -0.6941366195678711,
39
+ "logqs/rejected": -0.6452130079269409,
40
+ "logqs_logps/rejected": 0.1449585109949112,
41
+ "loss": 0.4015,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.048149262714414685,
46
+ "grad_norm": 1.8515625,
47
+ "learning_rate": 8.02139037433155e-06,
48
+ "logps_logqs/chosen": 0.03457744047045708,
49
+ "logqs/chosen": -0.6867337226867676,
50
+ "logqs/rejected": -0.6923023462295532,
51
+ "logqs_logps/rejected": 0.12560859322547913,
52
+ "loss": 0.3565,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 0.06419901695255291,
57
+ "grad_norm": 1.234375,
58
+ "learning_rate": 1.0695187165775402e-05,
59
+ "logps_logqs/chosen": 0.05277745798230171,
60
+ "logqs/chosen": -0.7405093908309937,
61
+ "logqs/rejected": -0.7894801497459412,
62
+ "logqs_logps/rejected": 0.0936320573091507,
63
+ "loss": 0.3228,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 0.08024877119069114,
68
+ "grad_norm": 1.0,
69
+ "learning_rate": 1.3368983957219252e-05,
70
+ "logps_logqs/chosen": 0.04658779874444008,
71
+ "logqs/chosen": -0.7268352508544922,
72
+ "logqs/rejected": -0.7926934361457825,
73
+ "logqs_logps/rejected": 0.09889288991689682,
74
+ "loss": 0.3027,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.09629852542882937,
79
+ "grad_norm": 0.92578125,
80
+ "learning_rate": 1.60427807486631e-05,
81
+ "logps_logqs/chosen": 0.04193533584475517,
82
+ "logqs/chosen": -0.7151986956596375,
83
+ "logqs/rejected": -0.7687807083129883,
84
+ "logqs_logps/rejected": 0.09696364402770996,
85
+ "loss": 0.2856,
86
+ "step": 60
87
+ },
88
+ {
89
+ "epoch": 0.1123482796669676,
90
+ "grad_norm": 1.0625,
91
+ "learning_rate": 1.8716577540106954e-05,
92
+ "logps_logqs/chosen": 0.04730736091732979,
93
+ "logqs/chosen": -0.7385339736938477,
94
+ "logqs/rejected": -0.8247607946395874,
95
+ "logqs_logps/rejected": 0.08417234569787979,
96
+ "loss": 0.285,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 0.12839803390510582,
101
+ "grad_norm": 0.890625,
102
+ "learning_rate": 2.1390374331550803e-05,
103
+ "logps_logqs/chosen": 0.047996845096349716,
104
+ "logqs/chosen": -0.7431933879852295,
105
+ "logqs/rejected": -0.8130408525466919,
106
+ "logqs_logps/rejected": 0.08468352258205414,
107
+ "loss": 0.2797,
108
+ "step": 80
109
+ },
110
+ {
111
+ "epoch": 0.14444778814324405,
112
+ "grad_norm": 0.79296875,
113
+ "learning_rate": 2.4064171122994652e-05,
114
+ "logps_logqs/chosen": 0.04765843600034714,
115
+ "logqs/chosen": -0.7173298001289368,
116
+ "logqs/rejected": -0.7709535360336304,
117
+ "logqs_logps/rejected": 0.08819471299648285,
118
+ "loss": 0.2705,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 0.16049754238138228,
123
+ "grad_norm": 0.82421875,
124
+ "learning_rate": 2.6737967914438505e-05,
125
+ "logps_logqs/chosen": 0.040474437177181244,
126
+ "logqs/chosen": -0.710905909538269,
127
+ "logqs/rejected": -0.7959136962890625,
128
+ "logqs_logps/rejected": 0.09612198173999786,
129
+ "loss": 0.2683,
130
+ "step": 100
131
+ },
132
+ {
133
+ "epoch": 0.1765472966195205,
134
+ "grad_norm": 0.7109375,
135
+ "learning_rate": 2.9411764705882354e-05,
136
+ "logps_logqs/chosen": 0.020066609606146812,
137
+ "logqs/chosen": -0.6621483564376831,
138
+ "logqs/rejected": -0.7314690351486206,
139
+ "logqs_logps/rejected": 0.11639375984668732,
140
+ "loss": 0.2682,
141
+ "step": 110
142
+ },
143
+ {
144
+ "epoch": 0.19259705085765874,
145
+ "grad_norm": 0.9921875,
146
+ "learning_rate": 3.20855614973262e-05,
147
+ "logps_logqs/chosen": 0.017146889120340347,
148
+ "logqs/chosen": -0.6599355340003967,
149
+ "logqs/rejected": -0.7124370336532593,
150
+ "logqs_logps/rejected": 0.12015531212091446,
151
+ "loss": 0.268,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.20864680509579697,
156
+ "grad_norm": 1.78125,
157
+ "learning_rate": 3.4759358288770055e-05,
158
+ "logps_logqs/chosen": 0.0369986966252327,
159
+ "logqs/chosen": -0.7184829115867615,
160
+ "logqs/rejected": -0.8139392137527466,
161
+ "logqs_logps/rejected": 0.09705157577991486,
162
+ "loss": 0.2611,
163
+ "step": 130
164
+ },
165
+ {
166
+ "epoch": 0.2246965593339352,
167
+ "grad_norm": 0.87890625,
168
+ "learning_rate": 3.743315508021391e-05,
169
+ "logps_logqs/chosen": 0.07482504099607468,
170
+ "logqs/chosen": -0.7948209643363953,
171
+ "logqs/rejected": -0.9149974584579468,
172
+ "logqs_logps/rejected": 0.04995354264974594,
173
+ "loss": 0.2442,
174
+ "step": 140
175
+ },
176
+ {
177
+ "epoch": 0.24074631357207343,
178
+ "grad_norm": 0.796875,
179
+ "learning_rate": 4.0106951871657754e-05,
180
+ "logps_logqs/chosen": 0.08310296386480331,
181
+ "logqs/chosen": -0.812109649181366,
182
+ "logqs/rejected": -0.9168623089790344,
183
+ "logqs_logps/rejected": 0.041201137006282806,
184
+ "loss": 0.2403,
185
+ "step": 150
186
+ },
187
+ {
188
+ "epoch": 0.25679606781021164,
189
+ "grad_norm": 0.70703125,
190
+ "learning_rate": 4.2780748663101606e-05,
191
+ "logps_logqs/chosen": 0.08498374372720718,
192
+ "logqs/chosen": -0.8101779818534851,
193
+ "logqs/rejected": -0.9302076101303101,
194
+ "logqs_logps/rejected": 0.03671664744615555,
195
+ "loss": 0.2336,
196
+ "step": 160
197
+ },
198
+ {
199
+ "epoch": 0.2728458220483499,
200
+ "grad_norm": 0.85546875,
201
+ "learning_rate": 4.545454545454546e-05,
202
+ "logps_logqs/chosen": 0.08320538699626923,
203
+ "logqs/chosen": -0.8246332406997681,
204
+ "logqs/rejected": -0.9219290018081665,
205
+ "logqs_logps/rejected": 0.03701116889715195,
206
+ "loss": 0.2386,
207
+ "step": 170
208
+ },
209
+ {
210
+ "epoch": 0.2888955762864881,
211
+ "grad_norm": 0.77734375,
212
+ "learning_rate": 4.8128342245989304e-05,
213
+ "logps_logqs/chosen": 0.08179891854524612,
214
+ "logqs/chosen": -0.787845253944397,
215
+ "logqs/rejected": -0.9030144810676575,
216
+ "logqs_logps/rejected": 0.03777966648340225,
217
+ "loss": 0.2315,
218
+ "step": 180
219
+ },
220
+ {
221
+ "epoch": 0.30494533052462636,
222
+ "grad_norm": 0.7734375,
223
+ "learning_rate": 4.9999607536612036e-05,
224
+ "logps_logqs/chosen": 0.08169040083885193,
225
+ "logqs/chosen": -0.8232030868530273,
226
+ "logqs/rejected": -0.9288152456283569,
227
+ "logqs_logps/rejected": 0.039177440106868744,
228
+ "loss": 0.2389,
229
+ "step": 190
230
+ },
231
+ {
232
+ "epoch": 0.32099508476276456,
233
+ "grad_norm": 0.72265625,
234
+ "learning_rate": 4.9992630752499945e-05,
235
+ "logps_logqs/chosen": 0.08127471804618835,
236
+ "logqs/chosen": -0.8097829818725586,
237
+ "logqs/rejected": -0.9110867381095886,
238
+ "logqs_logps/rejected": 0.03667169809341431,
239
+ "loss": 0.2279,
240
+ "step": 200
241
+ },
242
+ {
243
+ "epoch": 0.3370448390009028,
244
+ "grad_norm": 0.71875,
245
+ "learning_rate": 4.997693536122969e-05,
246
+ "logps_logqs/chosen": 0.0788678377866745,
247
+ "logqs/chosen": -0.7994121313095093,
248
+ "logqs/rejected": -0.9303783178329468,
249
+ "logqs_logps/rejected": 0.037957318127155304,
250
+ "loss": 0.2291,
251
+ "step": 210
252
+ },
253
+ {
254
+ "epoch": 0.353094593239041,
255
+ "grad_norm": 0.7109375,
256
+ "learning_rate": 4.995252683809324e-05,
257
+ "logps_logqs/chosen": 0.08248866349458694,
258
+ "logqs/chosen": -0.8128089904785156,
259
+ "logqs/rejected": -0.9362344741821289,
260
+ "logqs_logps/rejected": 0.037949927151203156,
261
+ "loss": 0.229,
262
+ "step": 220
263
+ },
264
+ {
265
+ "epoch": 0.3691443474771793,
266
+ "grad_norm": 0.63671875,
267
+ "learning_rate": 4.9919413697933496e-05,
268
+ "logps_logqs/chosen": 0.0817771628499031,
269
+ "logqs/chosen": -0.7939696311950684,
270
+ "logqs/rejected": -0.9058554768562317,
271
+ "logqs_logps/rejected": 0.04094362258911133,
272
+ "loss": 0.2314,
273
+ "step": 230
274
+ },
275
+ {
276
+ "epoch": 0.3851941017153175,
277
+ "grad_norm": 0.71875,
278
+ "learning_rate": 4.987760749217389e-05,
279
+ "logps_logqs/chosen": 0.076979860663414,
280
+ "logqs/chosen": -0.7874332070350647,
281
+ "logqs/rejected": -0.9303415417671204,
282
+ "logqs_logps/rejected": 0.04022833704948425,
283
+ "loss": 0.2244,
284
+ "step": 240
285
+ },
286
+ {
287
+ "epoch": 0.4012438559534557,
288
+ "grad_norm": 0.83984375,
289
+ "learning_rate": 4.982712280478875e-05,
290
+ "logps_logqs/chosen": 0.07487889379262924,
291
+ "logqs/chosen": -0.7985113859176636,
292
+ "logqs/rejected": -0.9595896005630493,
293
+ "logqs_logps/rejected": 0.04083302244544029,
294
+ "loss": 0.2277,
295
+ "step": 250
296
+ },
297
+ {
298
+ "epoch": 0.41729361019159394,
299
+ "grad_norm": 0.6953125,
300
+ "learning_rate": 4.976797724721567e-05,
301
+ "logps_logqs/chosen": 0.07195514440536499,
302
+ "logqs/chosen": -0.787926971912384,
303
+ "logqs/rejected": -0.897616982460022,
304
+ "logqs_logps/rejected": 0.0396430678665638,
305
+ "loss": 0.2163,
306
+ "step": 260
307
+ },
308
+ {
309
+ "epoch": 0.43334336442973215,
310
+ "grad_norm": 0.7734375,
311
+ "learning_rate": 4.9700191452211806e-05,
312
+ "logps_logqs/chosen": 0.07640247046947479,
313
+ "logqs/chosen": -0.7960728406906128,
314
+ "logqs/rejected": -0.928481936454773,
315
+ "logqs_logps/rejected": 0.03928074985742569,
316
+ "loss": 0.2226,
317
+ "step": 270
318
+ },
319
+ {
320
+ "epoch": 0.4493931186678704,
321
+ "grad_norm": 0.72265625,
322
+ "learning_rate": 4.9623789066656276e-05,
323
+ "logps_logqs/chosen": 0.07622957229614258,
324
+ "logqs/chosen": -0.8077980279922485,
325
+ "logqs/rejected": -0.9034830927848816,
326
+ "logqs_logps/rejected": 0.046734608709812164,
327
+ "loss": 0.2256,
328
+ "step": 280
329
+ },
330
+ {
331
+ "epoch": 0.4654428729060086,
332
+ "grad_norm": 0.78515625,
333
+ "learning_rate": 4.953879674330093e-05,
334
+ "logps_logqs/chosen": 0.07244043052196503,
335
+ "logqs/chosen": -0.7900176644325256,
336
+ "logqs/rejected": -0.891532301902771,
337
+ "logqs_logps/rejected": 0.04387632757425308,
338
+ "loss": 0.217,
339
+ "step": 290
340
+ },
341
+ {
342
+ "epoch": 0.48149262714414687,
343
+ "grad_norm": 0.6796875,
344
+ "learning_rate": 4.944524413147263e-05,
345
+ "logps_logqs/chosen": 0.07289232313632965,
346
+ "logqs/chosen": -0.7856311798095703,
347
+ "logqs/rejected": -0.9099383354187012,
348
+ "logqs_logps/rejected": 0.03974480181932449,
349
+ "loss": 0.2173,
350
+ "step": 300
351
+ },
352
+ {
353
+ "epoch": 0.49754238138228507,
354
+ "grad_norm": 0.66796875,
355
+ "learning_rate": 4.934316386673022e-05,
356
+ "logps_logqs/chosen": 0.07063151150941849,
357
+ "logqs/chosen": -0.7815223932266235,
358
+ "logqs/rejected": -0.9079385995864868,
359
+ "logqs_logps/rejected": 0.041664548218250275,
360
+ "loss": 0.2126,
361
+ "step": 310
362
+ },
363
+ {
364
+ "epoch": 0.5135921356204233,
365
+ "grad_norm": 0.61328125,
366
+ "learning_rate": 4.923259155947964e-05,
367
+ "logps_logqs/chosen": 0.071230448782444,
368
+ "logqs/chosen": -0.8096257448196411,
369
+ "logqs/rejected": -0.9752206802368164,
370
+ "logqs_logps/rejected": 0.03939716890454292,
371
+ "loss": 0.2194,
372
+ "step": 320
373
+ },
374
+ {
375
+ "epoch": 0.5296418898585615,
376
+ "grad_norm": 0.6640625,
377
+ "learning_rate": 4.911356578255139e-05,
378
+ "logps_logqs/chosen": 0.07356056571006775,
379
+ "logqs/chosen": -0.7827819585800171,
380
+ "logqs/rejected": -0.9489104151725769,
381
+ "logqs_logps/rejected": 0.040141116827726364,
382
+ "loss": 0.2145,
383
+ "step": 330
384
+ },
385
+ {
386
+ "epoch": 0.5456916440966998,
387
+ "grad_norm": 0.7109375,
388
+ "learning_rate": 4.898612805774447e-05,
389
+ "logps_logqs/chosen": 0.07881536334753036,
390
+ "logqs/chosen": -0.8095147013664246,
391
+ "logqs/rejected": -0.9313008189201355,
392
+ "logqs_logps/rejected": 0.03702981770038605,
393
+ "loss": 0.2165,
394
+ "step": 340
395
+ },
396
+ {
397
+ "epoch": 0.561741398334838,
398
+ "grad_norm": 0.64453125,
399
+ "learning_rate": 4.885032284134165e-05,
400
+ "logps_logqs/chosen": 0.07632436603307724,
401
+ "logqs/chosen": -0.7979615926742554,
402
+ "logqs/rejected": -0.9368084073066711,
403
+ "logqs_logps/rejected": 0.03654230386018753,
404
+ "loss": 0.2113,
405
+ "step": 350
406
+ },
407
+ {
408
+ "epoch": 0.5777911525729762,
409
+ "grad_norm": 0.62109375,
410
+ "learning_rate": 4.8706197508600984e-05,
411
+ "logps_logqs/chosen": 0.07344283908605576,
412
+ "logqs/chosen": -0.8101444244384766,
413
+ "logqs/rejected": -0.9295485615730286,
414
+ "logqs_logps/rejected": 0.03901313990354538,
415
+ "loss": 0.2151,
416
+ "step": 360
417
+ },
418
+ {
419
+ "epoch": 0.5938409068111145,
420
+ "grad_norm": 0.62890625,
421
+ "learning_rate": 4.855380233722915e-05,
422
+ "logps_logqs/chosen": 0.07421533018350601,
423
+ "logqs/chosen": -0.7826108336448669,
424
+ "logqs/rejected": -0.9229670763015747,
425
+ "logqs_logps/rejected": 0.038314513862133026,
426
+ "loss": 0.2129,
427
+ "step": 370
428
+ },
429
+ {
430
+ "epoch": 0.6098906610492527,
431
+ "grad_norm": 0.62890625,
432
+ "learning_rate": 4.839319048984217e-05,
433
+ "logps_logqs/chosen": 0.07273373752832413,
434
+ "logqs/chosen": -0.7798442840576172,
435
+ "logqs/rejected": -0.9371780157089233,
436
+ "logqs_logps/rejected": 0.03713950887322426,
437
+ "loss": 0.2067,
438
+ "step": 380
439
+ },
440
+ {
441
+ "epoch": 0.6259404152873909,
442
+ "grad_norm": 0.62890625,
443
+ "learning_rate": 4.822441799541979e-05,
444
+ "logps_logqs/chosen": 0.07329441606998444,
445
+ "logqs/chosen": -0.8102380037307739,
446
+ "logqs/rejected": -0.95549476146698,
447
+ "logqs_logps/rejected": 0.036079905927181244,
448
+ "loss": 0.2158,
449
+ "step": 390
450
+ },
451
+ {
452
+ "epoch": 0.6419901695255291,
453
+ "grad_norm": 0.66796875,
454
+ "learning_rate": 4.8047543729759936e-05,
455
+ "logps_logqs/chosen": 0.07404083013534546,
456
+ "logqs/chosen": -0.7847949266433716,
457
+ "logqs/rejected": -0.9250528216362,
458
+ "logqs_logps/rejected": 0.036556728184223175,
459
+ "loss": 0.2105,
460
+ "step": 400
461
+ },
462
+ {
463
+ "epoch": 0.6580399237636674,
464
+ "grad_norm": 0.69140625,
465
+ "learning_rate": 4.786262939494007e-05,
466
+ "logps_logqs/chosen": 0.07539906352758408,
467
+ "logqs/chosen": -0.8072575330734253,
468
+ "logqs/rejected": -0.9661371111869812,
469
+ "logqs_logps/rejected": 0.03662776201963425,
470
+ "loss": 0.2154,
471
+ "step": 410
472
+ },
473
+ {
474
+ "epoch": 0.6740896780018056,
475
+ "grad_norm": 0.66015625,
476
+ "learning_rate": 4.766973949779261e-05,
477
+ "logps_logqs/chosen": 0.0744672566652298,
478
+ "logqs/chosen": -0.787712574005127,
479
+ "logqs/rejected": -0.9350829124450684,
480
+ "logqs_logps/rejected": 0.0353250689804554,
481
+ "loss": 0.2074,
482
+ "step": 420
483
+ },
484
+ {
485
+ "epoch": 0.6901394322399438,
486
+ "grad_norm": 0.62890625,
487
+ "learning_rate": 4.746894132740186e-05,
488
+ "logps_logqs/chosen": 0.07364196330308914,
489
+ "logqs/chosen": -0.7813644409179688,
490
+ "logqs/rejected": -0.9208289980888367,
491
+ "logqs_logps/rejected": 0.0356430858373642,
492
+ "loss": 0.2053,
493
+ "step": 430
494
+ },
495
+ {
496
+ "epoch": 0.706189186478082,
497
+ "grad_norm": 0.56640625,
498
+ "learning_rate": 4.726030493163044e-05,
499
+ "logps_logqs/chosen": 0.07587061077356339,
500
+ "logqs/chosen": -0.7853146195411682,
501
+ "logqs/rejected": -0.8944910168647766,
502
+ "logqs_logps/rejected": 0.03531279042363167,
503
+ "loss": 0.2078,
504
+ "step": 440
505
+ },
506
+ {
507
+ "epoch": 0.7222389407162203,
508
+ "grad_norm": 0.68359375,
509
+ "learning_rate": 4.7043903092683314e-05,
510
+ "logps_logqs/chosen": 0.07814273238182068,
511
+ "logqs/chosen": -0.8133522272109985,
512
+ "logqs/rejected": -0.9508693814277649,
513
+ "logqs_logps/rejected": 0.030778918415308,
514
+ "loss": 0.2104,
515
+ "step": 450
516
+ },
517
+ {
518
+ "epoch": 0.7382886949543586,
519
+ "grad_norm": 0.72265625,
520
+ "learning_rate": 4.6819811301717885e-05,
521
+ "logps_logqs/chosen": 0.07763786613941193,
522
+ "logqs/chosen": -0.8061367869377136,
523
+ "logqs/rejected": -0.9445575475692749,
524
+ "logqs_logps/rejected": 0.035373255610466,
525
+ "loss": 0.2084,
526
+ "step": 460
527
+ },
528
+ {
529
+ "epoch": 0.7543384491924967,
530
+ "grad_norm": 0.59765625,
531
+ "learning_rate": 4.6588107732509134e-05,
532
+ "logps_logqs/chosen": 0.07325359433889389,
533
+ "logqs/chosen": -0.7970572710037231,
534
+ "logqs/rejected": -0.9243482351303101,
535
+ "logqs_logps/rejected": 0.03834759443998337,
536
+ "loss": 0.2088,
537
+ "step": 470
538
+ },
539
+ {
540
+ "epoch": 0.770388203430635,
541
+ "grad_norm": 0.5625,
542
+ "learning_rate": 4.634887321417895e-05,
543
+ "logps_logqs/chosen": 0.0732722282409668,
544
+ "logqs/chosen": -0.7719672918319702,
545
+ "logqs/rejected": -0.9193938970565796,
546
+ "logqs_logps/rejected": 0.03625096380710602,
547
+ "loss": 0.2075,
548
+ "step": 480
549
+ },
550
+ {
551
+ "epoch": 0.7864379576687732,
552
+ "grad_norm": 0.90234375,
553
+ "learning_rate": 4.6102191202999065e-05,
554
+ "logps_logqs/chosen": 0.07290570437908173,
555
+ "logqs/chosen": -0.8092619180679321,
556
+ "logqs/rejected": -0.9642523527145386,
557
+ "logqs_logps/rejected": 0.03311945125460625,
558
+ "loss": 0.2045,
559
+ "step": 490
560
+ },
561
+ {
562
+ "epoch": 0.8024877119069114,
563
+ "grad_norm": 0.6015625,
564
+ "learning_rate": 4.5848147753277656e-05,
565
+ "logps_logqs/chosen": 0.07134760171175003,
566
+ "logqs/chosen": -0.7773372530937195,
567
+ "logqs/rejected": -0.9478441476821899,
568
+ "logqs_logps/rejected": 0.03295496851205826,
569
+ "loss": 0.2049,
570
+ "step": 500
571
+ },
572
+ {
573
+ "epoch": 0.8185374661450496,
574
+ "grad_norm": 0.5859375,
575
+ "learning_rate": 4.5586831487339485e-05,
576
+ "logps_logqs/chosen": 0.07219503819942474,
577
+ "logqs/chosen": -0.7950411438941956,
578
+ "logqs/rejected": -0.9455002546310425,
579
+ "logqs_logps/rejected": 0.03765324503183365,
580
+ "loss": 0.208,
581
+ "step": 510
582
+ },
583
+ {
584
+ "epoch": 0.8345872203831879,
585
+ "grad_norm": 0.62109375,
586
+ "learning_rate": 4.531833356461027e-05,
587
+ "logps_logqs/chosen": 0.06849464029073715,
588
+ "logqs/chosen": -0.7819440364837646,
589
+ "logqs/rejected": -0.9597524404525757,
590
+ "logqs_logps/rejected": 0.04056422412395477,
591
+ "loss": 0.2084,
592
+ "step": 520
593
+ },
594
+ {
595
+ "epoch": 0.8506369746213261,
596
+ "grad_norm": 0.6171875,
597
+ "learning_rate": 4.5042747649816006e-05,
598
+ "logps_logqs/chosen": 0.07160626351833344,
599
+ "logqs/chosen": -0.7721427083015442,
600
+ "logqs/rejected": -0.94866544008255,
601
+ "logqs_logps/rejected": 0.03749927878379822,
602
+ "loss": 0.2068,
603
+ "step": 530
604
+ },
605
+ {
606
+ "epoch": 0.8666867288594643,
607
+ "grad_norm": 0.5546875,
608
+ "learning_rate": 4.476016988030826e-05,
609
+ "logps_logqs/chosen": 0.07214485853910446,
610
+ "logqs/chosen": -0.7874671816825867,
611
+ "logqs/rejected": -0.9504098892211914,
612
+ "logqs_logps/rejected": 0.03507527709007263,
613
+ "loss": 0.2045,
614
+ "step": 540
615
+ },
616
+ {
617
+ "epoch": 0.8827364830976026,
618
+ "grad_norm": 0.54296875,
619
+ "learning_rate": 4.447069883252696e-05,
620
+ "logps_logqs/chosen": 0.07395409047603607,
621
+ "logqs/chosen": -0.771978497505188,
622
+ "logqs/rejected": -0.9129235148429871,
623
+ "logqs_logps/rejected": 0.039593033492565155,
624
+ "loss": 0.205,
625
+ "step": 550
626
+ },
627
+ {
628
+ "epoch": 0.8987862373357408,
629
+ "grad_norm": 0.61328125,
630
+ "learning_rate": 4.417443548761227e-05,
631
+ "logps_logqs/chosen": 0.07299650460481644,
632
+ "logqs/chosen": -0.7979342341423035,
633
+ "logqs/rejected": -0.9357426762580872,
634
+ "logqs_logps/rejected": 0.03937726467847824,
635
+ "loss": 0.2083,
636
+ "step": 560
637
+ },
638
+ {
639
+ "epoch": 0.9148359915738791,
640
+ "grad_norm": 0.57421875,
641
+ "learning_rate": 4.387148319617763e-05,
642
+ "logps_logqs/chosen": 0.06836996972560883,
643
+ "logqs/chosen": -0.7659951448440552,
644
+ "logqs/rejected": -0.8732549548149109,
645
+ "logqs_logps/rejected": 0.039081670343875885,
646
+ "loss": 0.203,
647
+ "step": 570
648
+ },
649
+ {
650
+ "epoch": 0.9308857458120172,
651
+ "grad_norm": 0.57421875,
652
+ "learning_rate": 4.356194764225618e-05,
653
+ "logps_logqs/chosen": 0.07397963851690292,
654
+ "logqs/chosen": -0.7891489863395691,
655
+ "logqs/rejected": -0.9294188618659973,
656
+ "logqs_logps/rejected": 0.03416060656309128,
657
+ "loss": 0.2024,
658
+ "step": 580
659
+ },
660
+ {
661
+ "epoch": 0.9469355000501555,
662
+ "grad_norm": 0.703125,
663
+ "learning_rate": 4.3245936806433205e-05,
664
+ "logps_logqs/chosen": 0.07466734945774078,
665
+ "logqs/chosen": -0.7958794832229614,
666
+ "logqs/rejected": -0.953458309173584,
667
+ "logqs_logps/rejected": 0.035235695540905,
668
+ "loss": 0.206,
669
+ "step": 590
670
+ },
671
+ {
672
+ "epoch": 0.9629852542882937,
673
+ "grad_norm": 0.5859375,
674
+ "learning_rate": 4.292356092817739e-05,
675
+ "logps_logqs/chosen": 0.07136549055576324,
676
+ "logqs/chosen": -0.7849777936935425,
677
+ "logqs/rejected": -0.9327009320259094,
678
+ "logqs_logps/rejected": 0.038332488387823105,
679
+ "loss": 0.2041,
680
+ "step": 600
681
+ },
682
+ {
683
+ "epoch": 0.979035008526432,
684
+ "grad_norm": 0.55859375,
685
+ "learning_rate": 4.259493246738409e-05,
686
+ "logps_logqs/chosen": 0.06976237148046494,
687
+ "logqs/chosen": -0.7658575773239136,
688
+ "logqs/rejected": -0.9027583003044128,
689
+ "logqs_logps/rejected": 0.03464614599943161,
690
+ "loss": 0.1993,
691
+ "step": 610
692
+ },
693
+ {
694
+ "epoch": 0.9950847627645701,
695
+ "grad_norm": 0.55078125,
696
+ "learning_rate": 4.226016606514411e-05,
697
+ "logps_logqs/chosen": 0.0706130638718605,
698
+ "logqs/chosen": -0.7847878932952881,
699
+ "logqs/rejected": -0.9509286880493164,
700
+ "logqs_logps/rejected": 0.03653167933225632,
701
+ "loss": 0.2035,
702
+ "step": 620
703
+ },
704
+ {
705
+ "epoch": 1.0111345170027084,
706
+ "grad_norm": 0.53515625,
707
+ "learning_rate": 4.191937850375136e-05,
708
+ "logps_logqs/chosen": 0.06913185119628906,
709
+ "logqs/chosen": -0.7918249368667603,
710
+ "logqs/rejected": -0.9458308219909668,
711
+ "logqs_logps/rejected": 0.030862990766763687,
712
+ "loss": 0.1886,
713
+ "step": 630
714
+ },
715
+ {
716
+ "epoch": 1.0271842712408465,
717
+ "grad_norm": 0.5625,
718
+ "learning_rate": 4.157268866596381e-05,
719
+ "logps_logqs/chosen": 0.0690259039402008,
720
+ "logqs/chosen": -0.7895857095718384,
721
+ "logqs/rejected": -0.9810531735420227,
722
+ "logqs_logps/rejected": 0.03111192025244236,
723
+ "loss": 0.187,
724
+ "step": 640
725
+ },
726
+ {
727
+ "epoch": 1.043234025478985,
728
+ "grad_norm": 0.53125,
729
+ "learning_rate": 4.1220217493531494e-05,
730
+ "logps_logqs/chosen": 0.06900795549154282,
731
+ "logqs/chosen": -0.7902609705924988,
732
+ "logqs/rejected": -0.9747546911239624,
733
+ "logqs_logps/rejected": 0.031710296869277954,
734
+ "loss": 0.1845,
735
+ "step": 650
736
+ },
737
+ {
738
+ "epoch": 1.059283779717123,
739
+ "grad_norm": 0.515625,
740
+ "learning_rate": 4.086208794500637e-05,
741
+ "logps_logqs/chosen": 0.07102300226688385,
742
+ "logqs/chosen": -0.7767165899276733,
743
+ "logqs/rejected": -0.9414197206497192,
744
+ "logqs_logps/rejected": 0.030909577384591103,
745
+ "loss": 0.1807,
746
+ "step": 660
747
+ },
748
+ {
749
+ "epoch": 1.0753335339552612,
750
+ "grad_norm": 0.494140625,
751
+ "learning_rate": 4.049842495284858e-05,
752
+ "logps_logqs/chosen": 0.06985093653202057,
753
+ "logqs/chosen": -0.7880310416221619,
754
+ "logqs/rejected": -0.9546613693237305,
755
+ "logqs_logps/rejected": 0.030261676758527756,
756
+ "loss": 0.186,
757
+ "step": 670
758
+ },
759
+ {
760
+ "epoch": 1.0913832881933996,
761
+ "grad_norm": 0.50390625,
762
+ "learning_rate": 4.012935537984414e-05,
763
+ "logps_logqs/chosen": 0.07058895379304886,
764
+ "logqs/chosen": -0.7935397624969482,
765
+ "logqs/rejected": -1.0117493867874146,
766
+ "logqs_logps/rejected": 0.029221097007393837,
767
+ "loss": 0.1835,
768
+ "step": 680
769
+ },
770
+ {
771
+ "epoch": 1.1074330424315377,
772
+ "grad_norm": 0.6171875,
773
+ "learning_rate": 3.9755007974849135e-05,
774
+ "logps_logqs/chosen": 0.06916572153568268,
775
+ "logqs/chosen": -0.7878638505935669,
776
+ "logqs/rejected": -0.9999720454216003,
777
+ "logqs_logps/rejected": 0.030744653195142746,
778
+ "loss": 0.1857,
779
+ "step": 690
780
+ },
781
+ {
782
+ "epoch": 1.123482796669676,
783
+ "grad_norm": 0.57421875,
784
+ "learning_rate": 3.93755133278762e-05,
785
+ "logps_logqs/chosen": 0.07014169543981552,
786
+ "logqs/chosen": -0.7882756590843201,
787
+ "logqs/rejected": -1.0001386404037476,
788
+ "logqs_logps/rejected": 0.02842717245221138,
789
+ "loss": 0.1832,
790
+ "step": 700
791
+ },
792
+ {
793
+ "epoch": 1.1395325509078142,
794
+ "grad_norm": 0.59375,
795
+ "learning_rate": 3.899100382453845e-05,
796
+ "logps_logqs/chosen": 0.0701603814959526,
797
+ "logqs/chosen": -0.7642520070075989,
798
+ "logqs/rejected": -0.9544415473937988,
799
+ "logqs_logps/rejected": 0.027573522180318832,
800
+ "loss": 0.1806,
801
+ "step": 710
802
+ },
803
+ {
804
+ "epoch": 1.1555823051459524,
805
+ "grad_norm": 0.50390625,
806
+ "learning_rate": 3.8601613599867156e-05,
807
+ "logps_logqs/chosen": 0.06890274584293365,
808
+ "logqs/chosen": -0.7708860635757446,
809
+ "logqs/rejected": -0.9537287950515747,
810
+ "logqs_logps/rejected": 0.028619807213544846,
811
+ "loss": 0.1804,
812
+ "step": 720
813
+ },
814
+ {
815
+ "epoch": 1.1716320593840908,
816
+ "grad_norm": 0.609375,
817
+ "learning_rate": 3.8207478491519216e-05,
818
+ "logps_logqs/chosen": 0.0682586207985878,
819
+ "logqs/chosen": -0.7696245908737183,
820
+ "logqs/rejected": -0.9157883524894714,
821
+ "logqs_logps/rejected": 0.028189942240715027,
822
+ "loss": 0.1814,
823
+ "step": 730
824
+ },
825
+ {
826
+ "epoch": 1.187681813622229,
827
+ "grad_norm": 0.59375,
828
+ "learning_rate": 3.780873599239044e-05,
829
+ "logps_logqs/chosen": 0.07058210670948029,
830
+ "logqs/chosen": -0.801421046257019,
831
+ "logqs/rejected": -0.9591751098632812,
832
+ "logqs_logps/rejected": 0.03063536249101162,
833
+ "loss": 0.1853,
834
+ "step": 740
835
+ },
836
+ {
837
+ "epoch": 1.203731567860367,
838
+ "grad_norm": 0.55859375,
839
+ "learning_rate": 3.740552520265167e-05,
840
+ "logps_logqs/chosen": 0.06886611133813858,
841
+ "logqs/chosen": -0.792157769203186,
842
+ "logqs/rejected": -0.9370824694633484,
843
+ "logqs_logps/rejected": 0.02984955906867981,
844
+ "loss": 0.1833,
845
+ "step": 750
846
+ },
847
+ {
848
+ "epoch": 1.2197813220985054,
849
+ "grad_norm": 0.55859375,
850
+ "learning_rate": 3.699798678122403e-05,
851
+ "logps_logqs/chosen": 0.07179007679224014,
852
+ "logqs/chosen": -0.7622597813606262,
853
+ "logqs/rejected": -0.9382703900337219,
854
+ "logqs_logps/rejected": 0.029157549142837524,
855
+ "loss": 0.1793,
856
+ "step": 760
857
+ },
858
+ {
859
+ "epoch": 1.2358310763366436,
860
+ "grad_norm": 0.546875,
861
+ "learning_rate": 3.6586262896710476e-05,
862
+ "logps_logqs/chosen": 0.06790535151958466,
863
+ "logqs/chosen": -0.7676440477371216,
864
+ "logqs/rejected": -0.928793728351593,
865
+ "logqs_logps/rejected": 0.03030979633331299,
866
+ "loss": 0.1808,
867
+ "step": 770
868
+ },
869
+ {
870
+ "epoch": 1.2518808305747817,
871
+ "grad_norm": 0.51171875,
872
+ "learning_rate": 3.61704971778007e-05,
873
+ "logps_logqs/chosen": 0.06736615300178528,
874
+ "logqs/chosen": -0.770586371421814,
875
+ "logqs/rejected": -0.9197053909301758,
876
+ "logqs_logps/rejected": 0.03245236724615097,
877
+ "loss": 0.1799,
878
+ "step": 780
879
+ },
880
+ {
881
+ "epoch": 1.26793058481292,
882
+ "grad_norm": 0.4765625,
883
+ "learning_rate": 3.575083466316664e-05,
884
+ "logps_logqs/chosen": 0.06956754624843597,
885
+ "logqs/chosen": -0.7774965167045593,
886
+ "logqs/rejected": -0.9702298045158386,
887
+ "logqs_logps/rejected": 0.030748773366212845,
888
+ "loss": 0.1831,
889
+ "step": 790
890
+ },
891
+ {
892
+ "epoch": 1.2839803390510582,
893
+ "grad_norm": 0.5078125,
894
+ "learning_rate": 3.532742175086621e-05,
895
+ "logps_logqs/chosen": 0.06920956075191498,
896
+ "logqs/chosen": -0.7709556818008423,
897
+ "logqs/rejected": -0.9387216567993164,
898
+ "logqs_logps/rejected": 0.027966167777776718,
899
+ "loss": 0.1788,
900
+ "step": 800
901
+ },
902
+ {
903
+ "epoch": 1.3000300932891964,
904
+ "grad_norm": 0.54296875,
905
+ "learning_rate": 3.490040614727272e-05,
906
+ "logps_logqs/chosen": 0.06927359104156494,
907
+ "logqs/chosen": -0.7772814035415649,
908
+ "logqs/rejected": -0.9691821932792664,
909
+ "logqs_logps/rejected": 0.028416061773896217,
910
+ "loss": 0.1781,
911
+ "step": 810
912
+ },
913
+ {
914
+ "epoch": 1.3160798475273348,
915
+ "grad_norm": 0.5078125,
916
+ "learning_rate": 3.446993681554797e-05,
917
+ "logps_logqs/chosen": 0.07202474772930145,
918
+ "logqs/chosen": -0.7855108380317688,
919
+ "logqs/rejected": -0.9708096385002136,
920
+ "logqs_logps/rejected": 0.030036652460694313,
921
+ "loss": 0.1793,
922
+ "step": 820
923
+ },
924
+ {
925
+ "epoch": 1.332129601765473,
926
+ "grad_norm": 0.65234375,
927
+ "learning_rate": 3.403616392367681e-05,
928
+ "logps_logqs/chosen": 0.07054628431797028,
929
+ "logqs/chosen": -0.7540086507797241,
930
+ "logqs/rejected": -0.9042137265205383,
931
+ "logqs_logps/rejected": 0.03296629339456558,
932
+ "loss": 0.1818,
933
+ "step": 830
934
+ },
935
+ {
936
+ "epoch": 1.3481793560036113,
937
+ "grad_norm": 0.5078125,
938
+ "learning_rate": 3.3599238792081575e-05,
939
+ "logps_logqs/chosen": 0.07161605358123779,
940
+ "logqs/chosen": -0.7662056684494019,
941
+ "logqs/rejected": -0.9423080682754517,
942
+ "logqs_logps/rejected": 0.031152984127402306,
943
+ "loss": 0.1786,
944
+ "step": 840
945
+ },
946
+ {
947
+ "epoch": 1.3642291102417494,
948
+ "grad_norm": 0.5390625,
949
+ "learning_rate": 3.315931384083431e-05,
950
+ "logps_logqs/chosen": 0.0672772079706192,
951
+ "logqs/chosen": -0.7698653936386108,
952
+ "logqs/rejected": -0.9608929753303528,
953
+ "logqs_logps/rejected": 0.031727343797683716,
954
+ "loss": 0.181,
955
+ "step": 850
956
+ },
957
+ {
958
+ "epoch": 1.3802788644798878,
959
+ "grad_norm": 0.5390625,
960
+ "learning_rate": 3.2716542536485474e-05,
961
+ "logps_logqs/chosen": 0.07064563035964966,
962
+ "logqs/chosen": -0.7671880125999451,
963
+ "logqs/rejected": -0.9258206486701965,
964
+ "logqs_logps/rejected": 0.03185782581567764,
965
+ "loss": 0.1802,
966
+ "step": 860
967
+ },
968
+ {
969
+ "epoch": 1.396328618718026,
970
+ "grad_norm": 0.59375,
971
+ "learning_rate": 3.2271079338527626e-05,
972
+ "logps_logqs/chosen": 0.07001273334026337,
973
+ "logqs/chosen": -0.7942059636116028,
974
+ "logqs/rejected": -0.9803145527839661,
975
+ "logqs_logps/rejected": 0.02900245226919651,
976
+ "loss": 0.1783,
977
+ "step": 870
978
+ },
979
+ {
980
+ "epoch": 1.412378372956164,
981
+ "grad_norm": 0.48828125,
982
+ "learning_rate": 3.1823079645512655e-05,
983
+ "logps_logqs/chosen": 0.06748739629983902,
984
+ "logqs/chosen": -0.7796913385391235,
985
+ "logqs/rejected": -1.0028189420700073,
986
+ "logqs_logps/rejected": 0.028837621212005615,
987
+ "loss": 0.1791,
988
+ "step": 880
989
+ },
990
+ {
991
+ "epoch": 1.4284281271943025,
992
+ "grad_norm": 0.5234375,
993
+ "learning_rate": 3.137269974084139e-05,
994
+ "logps_logqs/chosen": 0.07031874358654022,
995
+ "logqs/chosen": -0.7788208723068237,
996
+ "logqs/rejected": -0.9545127153396606,
997
+ "logqs_logps/rejected": 0.029628973454236984,
998
+ "loss": 0.179,
999
+ "step": 890
1000
+ },
1001
+ {
1002
+ "epoch": 1.4444778814324406,
1003
+ "grad_norm": 0.486328125,
1004
+ "learning_rate": 3.092009673824469e-05,
1005
+ "logps_logqs/chosen": 0.07582792639732361,
1006
+ "logqs/chosen": -0.8078464269638062,
1007
+ "logqs/rejected": -0.9633975028991699,
1008
+ "logqs_logps/rejected": 0.02939186617732048,
1009
+ "loss": 0.1885,
1010
+ "step": 900
1011
+ },
1012
+ {
1013
+ "epoch": 1.4605276356705787,
1014
+ "grad_norm": 0.53515625,
1015
+ "learning_rate": 3.0465428526974665e-05,
1016
+ "logps_logqs/chosen": 0.07225798070430756,
1017
+ "logqs/chosen": -0.7663235068321228,
1018
+ "logqs/rejected": -0.9817646741867065,
1019
+ "logqs_logps/rejected": 0.028154581785202026,
1020
+ "loss": 0.178,
1021
+ "step": 910
1022
+ },
1023
+ {
1024
+ "epoch": 1.4765773899087171,
1025
+ "grad_norm": 0.49609375,
1026
+ "learning_rate": 3.000885371672554e-05,
1027
+ "logps_logqs/chosen": 0.07049473375082016,
1028
+ "logqs/chosen": -0.7796770930290222,
1029
+ "logqs/rejected": -0.9203440546989441,
1030
+ "logqs_logps/rejected": 0.028858328238129616,
1031
+ "loss": 0.1765,
1032
+ "step": 920
1033
+ },
1034
+ {
1035
+ "epoch": 1.4926271441468553,
1036
+ "grad_norm": 0.53125,
1037
+ "learning_rate": 2.9550531582303082e-05,
1038
+ "logps_logqs/chosen": 0.07375530898571014,
1039
+ "logqs/chosen": -0.7871274352073669,
1040
+ "logqs/rejected": -0.950308620929718,
1041
+ "logqs_logps/rejected": 0.030121903866529465,
1042
+ "loss": 0.1821,
1043
+ "step": 930
1044
+ },
1045
+ {
1046
+ "epoch": 1.5086768983849934,
1047
+ "grad_norm": 0.462890625,
1048
+ "learning_rate": 2.909062200806208e-05,
1049
+ "logps_logqs/chosen": 0.06755580753087997,
1050
+ "logqs/chosen": -0.7797432541847229,
1051
+ "logqs/rejected": -0.9579198956489563,
1052
+ "logqs_logps/rejected": 0.027673590928316116,
1053
+ "loss": 0.1738,
1054
+ "step": 940
1055
+ },
1056
+ {
1057
+ "epoch": 1.5247266526231318,
1058
+ "grad_norm": 0.451171875,
1059
+ "learning_rate": 2.8629285432131083e-05,
1060
+ "logps_logqs/chosen": 0.06972555816173553,
1061
+ "logqs/chosen": -0.7843119502067566,
1062
+ "logqs/rejected": -0.971518874168396,
1063
+ "logqs_logps/rejected": 0.026922276243567467,
1064
+ "loss": 0.1771,
1065
+ "step": 950
1066
+ },
1067
+ {
1068
+ "epoch": 1.54077640686127,
1069
+ "grad_norm": 0.5234375,
1070
+ "learning_rate": 2.8166682790444116e-05,
1071
+ "logps_logqs/chosen": 0.0729864090681076,
1072
+ "logqs/chosen": -0.7914843559265137,
1073
+ "logqs/rejected": -0.9729417562484741,
1074
+ "logqs_logps/rejected": 0.02641429379582405,
1075
+ "loss": 0.1788,
1076
+ "step": 960
1077
+ },
1078
+ {
1079
+ "epoch": 1.556826161099408,
1080
+ "grad_norm": 0.4921875,
1081
+ "learning_rate": 2.7702975460598547e-05,
1082
+ "logps_logqs/chosen": 0.06970744580030441,
1083
+ "logqs/chosen": -0.7867680788040161,
1084
+ "logqs/rejected": -0.9702849388122559,
1085
+ "logqs_logps/rejected": 0.026300692930817604,
1086
+ "loss": 0.1753,
1087
+ "step": 970
1088
+ },
1089
+ {
1090
+ "epoch": 1.5728759153375464,
1091
+ "grad_norm": 0.51171875,
1092
+ "learning_rate": 2.723832520555905e-05,
1093
+ "logps_logqs/chosen": 0.06844428926706314,
1094
+ "logqs/chosen": -0.7796690464019775,
1095
+ "logqs/rejected": -0.9721530079841614,
1096
+ "logqs_logps/rejected": 0.03003394976258278,
1097
+ "loss": 0.1775,
1098
+ "step": 980
1099
+ },
1100
+ {
1101
+ "epoch": 1.5889256695756846,
1102
+ "grad_norm": 0.50390625,
1103
+ "learning_rate": 2.677289411722702e-05,
1104
+ "logps_logqs/chosen": 0.06915868073701859,
1105
+ "logqs/chosen": -0.7836161851882935,
1106
+ "logqs/rejected": -1.0066392421722412,
1107
+ "logqs_logps/rejected": 0.029805105179548264,
1108
+ "loss": 0.1834,
1109
+ "step": 990
1110
+ },
1111
+ {
1112
+ "epoch": 1.6049754238138227,
1113
+ "grad_norm": 0.52734375,
1114
+ "learning_rate": 2.63068445598953e-05,
1115
+ "logps_logqs/chosen": 0.07069100439548492,
1116
+ "logqs/chosen": -0.7810501456260681,
1117
+ "logqs/rejected": -0.9559534788131714,
1118
+ "logqs_logps/rejected": 0.02675846591591835,
1119
+ "loss": 0.1799,
1120
+ "step": 1000
1121
+ },
1122
+ {
1123
+ "epoch": 1.6210251780519611,
1124
+ "grad_norm": 0.455078125,
1125
+ "learning_rate": 2.5840339113607854e-05,
1126
+ "logps_logqs/chosen": 0.06957536935806274,
1127
+ "logqs/chosen": -0.776276171207428,
1128
+ "logqs/rejected": -0.9653003811836243,
1129
+ "logqs_logps/rejected": 0.029132988303899765,
1130
+ "loss": 0.1764,
1131
+ "step": 1010
1132
+ },
1133
+ {
1134
+ "epoch": 1.6370749322900993,
1135
+ "grad_norm": 0.4921875,
1136
+ "learning_rate": 2.53735405174442e-05,
1137
+ "logps_logqs/chosen": 0.06984798610210419,
1138
+ "logqs/chosen": -0.7904434204101562,
1139
+ "logqs/rejected": -0.9736318588256836,
1140
+ "logqs_logps/rejected": 0.02975938282907009,
1141
+ "loss": 0.1778,
1142
+ "step": 1020
1143
+ },
1144
+ {
1145
+ "epoch": 1.6531246865282374,
1146
+ "grad_norm": 0.50390625,
1147
+ "learning_rate": 2.490661161274835e-05,
1148
+ "logps_logqs/chosen": 0.06985798478126526,
1149
+ "logqs/chosen": -0.7864385843276978,
1150
+ "logqs/rejected": -0.9617294073104858,
1151
+ "logqs_logps/rejected": 0.027367640286684036,
1152
+ "loss": 0.1774,
1153
+ "step": 1030
1154
+ },
1155
+ {
1156
+ "epoch": 1.6691744407663758,
1157
+ "grad_norm": 0.4453125,
1158
+ "learning_rate": 2.443971528632205e-05,
1159
+ "logps_logqs/chosen": 0.07047738134860992,
1160
+ "logqs/chosen": -0.7980072498321533,
1161
+ "logqs/rejected": -0.9655786752700806,
1162
+ "logqs_logps/rejected": 0.028297554701566696,
1163
+ "loss": 0.1779,
1164
+ "step": 1040
1165
+ },
1166
+ {
1167
+ "epoch": 1.6852241950045141,
1168
+ "grad_norm": 0.515625,
1169
+ "learning_rate": 2.3973014413602238e-05,
1170
+ "logps_logqs/chosen": 0.07119600474834442,
1171
+ "logqs/chosen": -0.7848029732704163,
1172
+ "logqs/rejected": -1.003758192062378,
1173
+ "logqs_logps/rejected": 0.029630709439516068,
1174
+ "loss": 0.1808,
1175
+ "step": 1050
1176
+ },
1177
+ {
1178
+ "epoch": 1.701273949242652,
1179
+ "grad_norm": 0.498046875,
1180
+ "learning_rate": 2.3506671801842364e-05,
1181
+ "logps_logqs/chosen": 0.06773122400045395,
1182
+ "logqs/chosen": -0.8044145703315735,
1183
+ "logqs/rejected": -0.9743335843086243,
1184
+ "logqs_logps/rejected": 0.027200300246477127,
1185
+ "loss": 0.1769,
1186
+ "step": 1060
1187
+ },
1188
+ {
1189
+ "epoch": 1.7173237034807904,
1190
+ "grad_norm": 0.4921875,
1191
+ "learning_rate": 2.3040850133317597e-05,
1192
+ "logps_logqs/chosen": 0.07197652757167816,
1193
+ "logqs/chosen": -0.7732763886451721,
1194
+ "logqs/rejected": -0.9615533947944641,
1195
+ "logqs_logps/rejected": 0.02922072447836399,
1196
+ "loss": 0.1801,
1197
+ "step": 1070
1198
+ },
1199
+ {
1200
+ "epoch": 1.7333734577189288,
1201
+ "grad_norm": 0.48046875,
1202
+ "learning_rate": 2.2575711908573548e-05,
1203
+ "logps_logqs/chosen": 0.06941990554332733,
1204
+ "logqs/chosen": -0.7937701940536499,
1205
+ "logqs/rejected": -1.0073679685592651,
1206
+ "logqs_logps/rejected": 0.026350444182753563,
1207
+ "loss": 0.1764,
1208
+ "step": 1080
1209
+ },
1210
+ {
1211
+ "epoch": 1.749423211957067,
1212
+ "grad_norm": 0.447265625,
1213
+ "learning_rate": 2.2111419389738435e-05,
1214
+ "logps_logqs/chosen": 0.07039657980203629,
1215
+ "logqs/chosen": -0.7764157056808472,
1216
+ "logqs/rejected": -0.9544513821601868,
1217
+ "logqs_logps/rejected": 0.02772611379623413,
1218
+ "loss": 0.1755,
1219
+ "step": 1090
1220
+ },
1221
+ {
1222
+ "epoch": 1.765472966195205,
1223
+ "grad_norm": 0.5390625,
1224
+ "learning_rate": 2.1648134543918423e-05,
1225
+ "logps_logqs/chosen": 0.07292209565639496,
1226
+ "logqs/chosen": -0.7701107859611511,
1227
+ "logqs/rejected": -0.9618105888366699,
1228
+ "logqs_logps/rejected": 0.027484769001603127,
1229
+ "loss": 0.1762,
1230
+ "step": 1100
1231
+ },
1232
+ {
1233
+ "epoch": 1.7815227204333435,
1234
+ "grad_norm": 0.427734375,
1235
+ "learning_rate": 2.1186018986695842e-05,
1236
+ "logps_logqs/chosen": 0.07149146497249603,
1237
+ "logqs/chosen": -0.7753132581710815,
1238
+ "logqs/rejected": -0.926436722278595,
1239
+ "logqs_logps/rejected": 0.029845744371414185,
1240
+ "loss": 0.1779,
1241
+ "step": 1110
1242
+ },
1243
+ {
1244
+ "epoch": 1.7975724746714816,
1245
+ "grad_norm": 0.45703125,
1246
+ "learning_rate": 2.0725233925750063e-05,
1247
+ "logps_logqs/chosen": 0.06752609461545944,
1248
+ "logqs/chosen": -0.7796769738197327,
1249
+ "logqs/rejected": -0.9575828313827515,
1250
+ "logqs_logps/rejected": 0.03261734917759895,
1251
+ "loss": 0.1771,
1252
+ "step": 1120
1253
+ },
1254
+ {
1255
+ "epoch": 1.8136222289096198,
1256
+ "grad_norm": 0.54296875,
1257
+ "learning_rate": 2.026594010462068e-05,
1258
+ "logps_logqs/chosen": 0.07001613825559616,
1259
+ "logqs/chosen": -0.7918448448181152,
1260
+ "logqs/rejected": -0.9749159812927246,
1261
+ "logqs_logps/rejected": 0.028455784544348717,
1262
+ "loss": 0.1789,
1263
+ "step": 1130
1264
+ },
1265
+ {
1266
+ "epoch": 1.8296719831477581,
1267
+ "grad_norm": 0.478515625,
1268
+ "learning_rate": 1.980829774663256e-05,
1269
+ "logps_logqs/chosen": 0.07289810478687286,
1270
+ "logqs/chosen": -0.7838630676269531,
1271
+ "logqs/rejected": -0.9698160290718079,
1272
+ "logqs_logps/rejected": 0.02906595729291439,
1273
+ "loss": 0.1794,
1274
+ "step": 1140
1275
+ },
1276
+ {
1277
+ "epoch": 1.8457217373858963,
1278
+ "grad_norm": 0.458984375,
1279
+ "learning_rate": 1.9352466499002422e-05,
1280
+ "logps_logqs/chosen": 0.0761161744594574,
1281
+ "logqs/chosen": -0.7819662094116211,
1282
+ "logqs/rejected": -0.9463040232658386,
1283
+ "logqs_logps/rejected": 0.028005197644233704,
1284
+ "loss": 0.183,
1285
+ "step": 1150
1286
+ },
1287
+ {
1288
+ "epoch": 1.8617714916240344,
1289
+ "grad_norm": 0.474609375,
1290
+ "learning_rate": 1.8898605377146383e-05,
1291
+ "logps_logqs/chosen": 0.06957074254751205,
1292
+ "logqs/chosen": -0.7857328057289124,
1293
+ "logqs/rejected": -0.9639459848403931,
1294
+ "logqs_logps/rejected": 0.027649903669953346,
1295
+ "loss": 0.1753,
1296
+ "step": 1160
1297
+ },
1298
+ {
1299
+ "epoch": 1.8778212458621728,
1300
+ "grad_norm": 0.48046875,
1301
+ "learning_rate": 1.8446872709207847e-05,
1302
+ "logps_logqs/chosen": 0.07362545281648636,
1303
+ "logqs/chosen": -0.7835830450057983,
1304
+ "logqs/rejected": -1.0045692920684814,
1305
+ "logqs_logps/rejected": 0.027295967563986778,
1306
+ "loss": 0.1806,
1307
+ "step": 1170
1308
+ },
1309
+ {
1310
+ "epoch": 1.893871000100311,
1311
+ "grad_norm": 0.51171875,
1312
+ "learning_rate": 1.799742608082519e-05,
1313
+ "logps_logqs/chosen": 0.07236044853925705,
1314
+ "logqs/chosen": -0.7885088324546814,
1315
+ "logqs/rejected": -0.9487521052360535,
1316
+ "logqs_logps/rejected": 0.026073191314935684,
1317
+ "loss": 0.1723,
1318
+ "step": 1180
1319
+ },
1320
+ {
1321
+ "epoch": 1.909920754338449,
1322
+ "grad_norm": 0.51171875,
1323
+ "learning_rate": 1.7550422280158513e-05,
1324
+ "logps_logqs/chosen": 0.0707259327173233,
1325
+ "logqs/chosen": -0.7704340815544128,
1326
+ "logqs/rejected": -0.9094502329826355,
1327
+ "logqs_logps/rejected": 0.027560651302337646,
1328
+ "loss": 0.1758,
1329
+ "step": 1190
1330
+ },
1331
+ {
1332
+ "epoch": 1.9259705085765875,
1333
+ "grad_norm": 0.435546875,
1334
+ "learning_rate": 1.7106017243194487e-05,
1335
+ "logps_logqs/chosen": 0.07020822167396545,
1336
+ "logqs/chosen": -0.7692683935165405,
1337
+ "logqs/rejected": -0.9656769037246704,
1338
+ "logqs_logps/rejected": 0.02792223170399666,
1339
+ "loss": 0.177,
1340
+ "step": 1200
1341
+ },
1342
+ {
1343
+ "epoch": 1.9420202628147256,
1344
+ "grad_norm": 0.53515625,
1345
+ "learning_rate": 1.6664365999348594e-05,
1346
+ "logps_logqs/chosen": 0.06943775713443756,
1347
+ "logqs/chosen": -0.7688643932342529,
1348
+ "logqs/rejected": -0.9713398218154907,
1349
+ "logqs_logps/rejected": 0.026859009638428688,
1350
+ "loss": 0.1748,
1351
+ "step": 1210
1352
+ },
1353
+ {
1354
+ "epoch": 1.9580700170528638,
1355
+ "grad_norm": 0.51953125,
1356
+ "learning_rate": 1.6225622617383494e-05,
1357
+ "logps_logqs/chosen": 0.07070201635360718,
1358
+ "logqs/chosen": -0.7839328050613403,
1359
+ "logqs/rejected": -0.9712308645248413,
1360
+ "logqs_logps/rejected": 0.028167420998215675,
1361
+ "loss": 0.1763,
1362
+ "step": 1220
1363
+ },
1364
+ {
1365
+ "epoch": 1.9741197712910021,
1366
+ "grad_norm": 0.4453125,
1367
+ "learning_rate": 1.578994015166263e-05,
1368
+ "logps_logqs/chosen": 0.07289667427539825,
1369
+ "logqs/chosen": -0.7838398814201355,
1370
+ "logqs/rejected": -0.9522368311882019,
1371
+ "logqs_logps/rejected": 0.02768387272953987,
1372
+ "loss": 0.1773,
1373
+ "step": 1230
1374
+ },
1375
+ {
1376
+ "epoch": 1.9901695255291403,
1377
+ "grad_norm": 0.53515625,
1378
+ "learning_rate": 1.535747058875765e-05,
1379
+ "logps_logqs/chosen": 0.06881529092788696,
1380
+ "logqs/chosen": -0.790834903717041,
1381
+ "logqs/rejected": -0.9497320055961609,
1382
+ "logqs_logps/rejected": 0.029328888282179832,
1383
+ "loss": 0.1734,
1384
+ "step": 1240
1385
+ }
1386
+ ],
1387
+ "logging_steps": 10,
1388
+ "max_steps": 1869,
1389
+ "num_input_tokens_seen": 0,
1390
+ "num_train_epochs": 3,
1391
+ "save_steps": 500,
1392
+ "stateful_callbacks": {
1393
+ "TrainerControl": {
1394
+ "args": {
1395
+ "should_epoch_stop": false,
1396
+ "should_evaluate": false,
1397
+ "should_log": false,
1398
+ "should_save": true,
1399
+ "should_training_stop": false
1400
+ },
1401
+ "attributes": {}
1402
+ }
1403
+ },
1404
+ "total_flos": 0.0,
1405
+ "train_batch_size": 2,
1406
+ "trial_name": null,
1407
+ "trial_params": null
1408
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1246/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9fa66895bee5a36a6f750ef897f491a1113f2f79af77c75bdef8187ae10031
3
+ size 6840
qwen2.5-1.5B-it-distillm2/checkpoint-1246/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/checkpoint-1869/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.45.2",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.45.2"
14
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/checkpoint-1869/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1f325bd96a7e02353ee13f20a8362121741466a9b362e7cfd122a67c5ae6e0
3
+ size 3087935634
qwen2.5-1.5B-it-distillm2/checkpoint-1869/special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "bos_token": "<|endoftext|>",
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|endoftext|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
qwen2.5-1.5B-it-distillm2/checkpoint-1869/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": "<|endoftext|>",
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 2048,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/trainer_state.json ADDED
@@ -0,0 +1,2090 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.999699067108035,
5
+ "eval_steps": 10000000,
6
+ "global_step": 1869,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0016049754238138228,
13
+ "grad_norm": 7.15625,
14
+ "learning_rate": 2.6737967914438503e-07,
15
+ "logps_logqs/chosen": 0.057332463562488556,
16
+ "logqs/chosen": -0.752794086933136,
17
+ "logqs/rejected": -0.7512239813804626,
18
+ "logqs_logps/rejected": 0.1236220970749855,
19
+ "loss": 0.4632,
20
+ "step": 1
21
+ },
22
+ {
23
+ "epoch": 0.016049754238138227,
24
+ "grad_norm": 5.71875,
25
+ "learning_rate": 2.6737967914438504e-06,
26
+ "logps_logqs/chosen": 0.047926291823387146,
27
+ "logqs/chosen": -0.7439535856246948,
28
+ "logqs/rejected": -0.7035253643989563,
29
+ "logqs_logps/rejected": 0.13991409540176392,
30
+ "loss": 0.4557,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.032099508476276455,
35
+ "grad_norm": 1.9296875,
36
+ "learning_rate": 5.347593582887701e-06,
37
+ "logps_logqs/chosen": 0.03161158040165901,
38
+ "logqs/chosen": -0.6941366195678711,
39
+ "logqs/rejected": -0.6452130079269409,
40
+ "logqs_logps/rejected": 0.1449585109949112,
41
+ "loss": 0.4015,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.048149262714414685,
46
+ "grad_norm": 1.8515625,
47
+ "learning_rate": 8.02139037433155e-06,
48
+ "logps_logqs/chosen": 0.03457744047045708,
49
+ "logqs/chosen": -0.6867337226867676,
50
+ "logqs/rejected": -0.6923023462295532,
51
+ "logqs_logps/rejected": 0.12560859322547913,
52
+ "loss": 0.3565,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 0.06419901695255291,
57
+ "grad_norm": 1.234375,
58
+ "learning_rate": 1.0695187165775402e-05,
59
+ "logps_logqs/chosen": 0.05277745798230171,
60
+ "logqs/chosen": -0.7405093908309937,
61
+ "logqs/rejected": -0.7894801497459412,
62
+ "logqs_logps/rejected": 0.0936320573091507,
63
+ "loss": 0.3228,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 0.08024877119069114,
68
+ "grad_norm": 1.0,
69
+ "learning_rate": 1.3368983957219252e-05,
70
+ "logps_logqs/chosen": 0.04658779874444008,
71
+ "logqs/chosen": -0.7268352508544922,
72
+ "logqs/rejected": -0.7926934361457825,
73
+ "logqs_logps/rejected": 0.09889288991689682,
74
+ "loss": 0.3027,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.09629852542882937,
79
+ "grad_norm": 0.92578125,
80
+ "learning_rate": 1.60427807486631e-05,
81
+ "logps_logqs/chosen": 0.04193533584475517,
82
+ "logqs/chosen": -0.7151986956596375,
83
+ "logqs/rejected": -0.7687807083129883,
84
+ "logqs_logps/rejected": 0.09696364402770996,
85
+ "loss": 0.2856,
86
+ "step": 60
87
+ },
88
+ {
89
+ "epoch": 0.1123482796669676,
90
+ "grad_norm": 1.0625,
91
+ "learning_rate": 1.8716577540106954e-05,
92
+ "logps_logqs/chosen": 0.04730736091732979,
93
+ "logqs/chosen": -0.7385339736938477,
94
+ "logqs/rejected": -0.8247607946395874,
95
+ "logqs_logps/rejected": 0.08417234569787979,
96
+ "loss": 0.285,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 0.12839803390510582,
101
+ "grad_norm": 0.890625,
102
+ "learning_rate": 2.1390374331550803e-05,
103
+ "logps_logqs/chosen": 0.047996845096349716,
104
+ "logqs/chosen": -0.7431933879852295,
105
+ "logqs/rejected": -0.8130408525466919,
106
+ "logqs_logps/rejected": 0.08468352258205414,
107
+ "loss": 0.2797,
108
+ "step": 80
109
+ },
110
+ {
111
+ "epoch": 0.14444778814324405,
112
+ "grad_norm": 0.79296875,
113
+ "learning_rate": 2.4064171122994652e-05,
114
+ "logps_logqs/chosen": 0.04765843600034714,
115
+ "logqs/chosen": -0.7173298001289368,
116
+ "logqs/rejected": -0.7709535360336304,
117
+ "logqs_logps/rejected": 0.08819471299648285,
118
+ "loss": 0.2705,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 0.16049754238138228,
123
+ "grad_norm": 0.82421875,
124
+ "learning_rate": 2.6737967914438505e-05,
125
+ "logps_logqs/chosen": 0.040474437177181244,
126
+ "logqs/chosen": -0.710905909538269,
127
+ "logqs/rejected": -0.7959136962890625,
128
+ "logqs_logps/rejected": 0.09612198173999786,
129
+ "loss": 0.2683,
130
+ "step": 100
131
+ },
132
+ {
133
+ "epoch": 0.1765472966195205,
134
+ "grad_norm": 0.7109375,
135
+ "learning_rate": 2.9411764705882354e-05,
136
+ "logps_logqs/chosen": 0.020066609606146812,
137
+ "logqs/chosen": -0.6621483564376831,
138
+ "logqs/rejected": -0.7314690351486206,
139
+ "logqs_logps/rejected": 0.11639375984668732,
140
+ "loss": 0.2682,
141
+ "step": 110
142
+ },
143
+ {
144
+ "epoch": 0.19259705085765874,
145
+ "grad_norm": 0.9921875,
146
+ "learning_rate": 3.20855614973262e-05,
147
+ "logps_logqs/chosen": 0.017146889120340347,
148
+ "logqs/chosen": -0.6599355340003967,
149
+ "logqs/rejected": -0.7124370336532593,
150
+ "logqs_logps/rejected": 0.12015531212091446,
151
+ "loss": 0.268,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.20864680509579697,
156
+ "grad_norm": 1.78125,
157
+ "learning_rate": 3.4759358288770055e-05,
158
+ "logps_logqs/chosen": 0.0369986966252327,
159
+ "logqs/chosen": -0.7184829115867615,
160
+ "logqs/rejected": -0.8139392137527466,
161
+ "logqs_logps/rejected": 0.09705157577991486,
162
+ "loss": 0.2611,
163
+ "step": 130
164
+ },
165
+ {
166
+ "epoch": 0.2246965593339352,
167
+ "grad_norm": 0.87890625,
168
+ "learning_rate": 3.743315508021391e-05,
169
+ "logps_logqs/chosen": 0.07482504099607468,
170
+ "logqs/chosen": -0.7948209643363953,
171
+ "logqs/rejected": -0.9149974584579468,
172
+ "logqs_logps/rejected": 0.04995354264974594,
173
+ "loss": 0.2442,
174
+ "step": 140
175
+ },
176
+ {
177
+ "epoch": 0.24074631357207343,
178
+ "grad_norm": 0.796875,
179
+ "learning_rate": 4.0106951871657754e-05,
180
+ "logps_logqs/chosen": 0.08310296386480331,
181
+ "logqs/chosen": -0.812109649181366,
182
+ "logqs/rejected": -0.9168623089790344,
183
+ "logqs_logps/rejected": 0.041201137006282806,
184
+ "loss": 0.2403,
185
+ "step": 150
186
+ },
187
+ {
188
+ "epoch": 0.25679606781021164,
189
+ "grad_norm": 0.70703125,
190
+ "learning_rate": 4.2780748663101606e-05,
191
+ "logps_logqs/chosen": 0.08498374372720718,
192
+ "logqs/chosen": -0.8101779818534851,
193
+ "logqs/rejected": -0.9302076101303101,
194
+ "logqs_logps/rejected": 0.03671664744615555,
195
+ "loss": 0.2336,
196
+ "step": 160
197
+ },
198
+ {
199
+ "epoch": 0.2728458220483499,
200
+ "grad_norm": 0.85546875,
201
+ "learning_rate": 4.545454545454546e-05,
202
+ "logps_logqs/chosen": 0.08320538699626923,
203
+ "logqs/chosen": -0.8246332406997681,
204
+ "logqs/rejected": -0.9219290018081665,
205
+ "logqs_logps/rejected": 0.03701116889715195,
206
+ "loss": 0.2386,
207
+ "step": 170
208
+ },
209
+ {
210
+ "epoch": 0.2888955762864881,
211
+ "grad_norm": 0.77734375,
212
+ "learning_rate": 4.8128342245989304e-05,
213
+ "logps_logqs/chosen": 0.08179891854524612,
214
+ "logqs/chosen": -0.787845253944397,
215
+ "logqs/rejected": -0.9030144810676575,
216
+ "logqs_logps/rejected": 0.03777966648340225,
217
+ "loss": 0.2315,
218
+ "step": 180
219
+ },
220
+ {
221
+ "epoch": 0.30494533052462636,
222
+ "grad_norm": 0.7734375,
223
+ "learning_rate": 4.9999607536612036e-05,
224
+ "logps_logqs/chosen": 0.08169040083885193,
225
+ "logqs/chosen": -0.8232030868530273,
226
+ "logqs/rejected": -0.9288152456283569,
227
+ "logqs_logps/rejected": 0.039177440106868744,
228
+ "loss": 0.2389,
229
+ "step": 190
230
+ },
231
+ {
232
+ "epoch": 0.32099508476276456,
233
+ "grad_norm": 0.72265625,
234
+ "learning_rate": 4.9992630752499945e-05,
235
+ "logps_logqs/chosen": 0.08127471804618835,
236
+ "logqs/chosen": -0.8097829818725586,
237
+ "logqs/rejected": -0.9110867381095886,
238
+ "logqs_logps/rejected": 0.03667169809341431,
239
+ "loss": 0.2279,
240
+ "step": 200
241
+ },
242
+ {
243
+ "epoch": 0.3370448390009028,
244
+ "grad_norm": 0.71875,
245
+ "learning_rate": 4.997693536122969e-05,
246
+ "logps_logqs/chosen": 0.0788678377866745,
247
+ "logqs/chosen": -0.7994121313095093,
248
+ "logqs/rejected": -0.9303783178329468,
249
+ "logqs_logps/rejected": 0.037957318127155304,
250
+ "loss": 0.2291,
251
+ "step": 210
252
+ },
253
+ {
254
+ "epoch": 0.353094593239041,
255
+ "grad_norm": 0.7109375,
256
+ "learning_rate": 4.995252683809324e-05,
257
+ "logps_logqs/chosen": 0.08248866349458694,
258
+ "logqs/chosen": -0.8128089904785156,
259
+ "logqs/rejected": -0.9362344741821289,
260
+ "logqs_logps/rejected": 0.037949927151203156,
261
+ "loss": 0.229,
262
+ "step": 220
263
+ },
264
+ {
265
+ "epoch": 0.3691443474771793,
266
+ "grad_norm": 0.63671875,
267
+ "learning_rate": 4.9919413697933496e-05,
268
+ "logps_logqs/chosen": 0.0817771628499031,
269
+ "logqs/chosen": -0.7939696311950684,
270
+ "logqs/rejected": -0.9058554768562317,
271
+ "logqs_logps/rejected": 0.04094362258911133,
272
+ "loss": 0.2314,
273
+ "step": 230
274
+ },
275
+ {
276
+ "epoch": 0.3851941017153175,
277
+ "grad_norm": 0.71875,
278
+ "learning_rate": 4.987760749217389e-05,
279
+ "logps_logqs/chosen": 0.076979860663414,
280
+ "logqs/chosen": -0.7874332070350647,
281
+ "logqs/rejected": -0.9303415417671204,
282
+ "logqs_logps/rejected": 0.04022833704948425,
283
+ "loss": 0.2244,
284
+ "step": 240
285
+ },
286
+ {
287
+ "epoch": 0.4012438559534557,
288
+ "grad_norm": 0.83984375,
289
+ "learning_rate": 4.982712280478875e-05,
290
+ "logps_logqs/chosen": 0.07487889379262924,
291
+ "logqs/chosen": -0.7985113859176636,
292
+ "logqs/rejected": -0.9595896005630493,
293
+ "logqs_logps/rejected": 0.04083302244544029,
294
+ "loss": 0.2277,
295
+ "step": 250
296
+ },
297
+ {
298
+ "epoch": 0.41729361019159394,
299
+ "grad_norm": 0.6953125,
300
+ "learning_rate": 4.976797724721567e-05,
301
+ "logps_logqs/chosen": 0.07195514440536499,
302
+ "logqs/chosen": -0.787926971912384,
303
+ "logqs/rejected": -0.897616982460022,
304
+ "logqs_logps/rejected": 0.0396430678665638,
305
+ "loss": 0.2163,
306
+ "step": 260
307
+ },
308
+ {
309
+ "epoch": 0.43334336442973215,
310
+ "grad_norm": 0.7734375,
311
+ "learning_rate": 4.9700191452211806e-05,
312
+ "logps_logqs/chosen": 0.07640247046947479,
313
+ "logqs/chosen": -0.7960728406906128,
314
+ "logqs/rejected": -0.928481936454773,
315
+ "logqs_logps/rejected": 0.03928074985742569,
316
+ "loss": 0.2226,
317
+ "step": 270
318
+ },
319
+ {
320
+ "epoch": 0.4493931186678704,
321
+ "grad_norm": 0.72265625,
322
+ "learning_rate": 4.9623789066656276e-05,
323
+ "logps_logqs/chosen": 0.07622957229614258,
324
+ "logqs/chosen": -0.8077980279922485,
325
+ "logqs/rejected": -0.9034830927848816,
326
+ "logqs_logps/rejected": 0.046734608709812164,
327
+ "loss": 0.2256,
328
+ "step": 280
329
+ },
330
+ {
331
+ "epoch": 0.4654428729060086,
332
+ "grad_norm": 0.78515625,
333
+ "learning_rate": 4.953879674330093e-05,
334
+ "logps_logqs/chosen": 0.07244043052196503,
335
+ "logqs/chosen": -0.7900176644325256,
336
+ "logqs/rejected": -0.891532301902771,
337
+ "logqs_logps/rejected": 0.04387632757425308,
338
+ "loss": 0.217,
339
+ "step": 290
340
+ },
341
+ {
342
+ "epoch": 0.48149262714414687,
343
+ "grad_norm": 0.6796875,
344
+ "learning_rate": 4.944524413147263e-05,
345
+ "logps_logqs/chosen": 0.07289232313632965,
346
+ "logqs/chosen": -0.7856311798095703,
347
+ "logqs/rejected": -0.9099383354187012,
348
+ "logqs_logps/rejected": 0.03974480181932449,
349
+ "loss": 0.2173,
350
+ "step": 300
351
+ },
352
+ {
353
+ "epoch": 0.49754238138228507,
354
+ "grad_norm": 0.66796875,
355
+ "learning_rate": 4.934316386673022e-05,
356
+ "logps_logqs/chosen": 0.07063151150941849,
357
+ "logqs/chosen": -0.7815223932266235,
358
+ "logqs/rejected": -0.9079385995864868,
359
+ "logqs_logps/rejected": 0.041664548218250275,
360
+ "loss": 0.2126,
361
+ "step": 310
362
+ },
363
+ {
364
+ "epoch": 0.5135921356204233,
365
+ "grad_norm": 0.61328125,
366
+ "learning_rate": 4.923259155947964e-05,
367
+ "logps_logqs/chosen": 0.071230448782444,
368
+ "logqs/chosen": -0.8096257448196411,
369
+ "logqs/rejected": -0.9752206802368164,
370
+ "logqs_logps/rejected": 0.03939716890454292,
371
+ "loss": 0.2194,
372
+ "step": 320
373
+ },
374
+ {
375
+ "epoch": 0.5296418898585615,
376
+ "grad_norm": 0.6640625,
377
+ "learning_rate": 4.911356578255139e-05,
378
+ "logps_logqs/chosen": 0.07356056571006775,
379
+ "logqs/chosen": -0.7827819585800171,
380
+ "logqs/rejected": -0.9489104151725769,
381
+ "logqs_logps/rejected": 0.040141116827726364,
382
+ "loss": 0.2145,
383
+ "step": 330
384
+ },
385
+ {
386
+ "epoch": 0.5456916440966998,
387
+ "grad_norm": 0.7109375,
388
+ "learning_rate": 4.898612805774447e-05,
389
+ "logps_logqs/chosen": 0.07881536334753036,
390
+ "logqs/chosen": -0.8095147013664246,
391
+ "logqs/rejected": -0.9313008189201355,
392
+ "logqs_logps/rejected": 0.03702981770038605,
393
+ "loss": 0.2165,
394
+ "step": 340
395
+ },
396
+ {
397
+ "epoch": 0.561741398334838,
398
+ "grad_norm": 0.64453125,
399
+ "learning_rate": 4.885032284134165e-05,
400
+ "logps_logqs/chosen": 0.07632436603307724,
401
+ "logqs/chosen": -0.7979615926742554,
402
+ "logqs/rejected": -0.9368084073066711,
403
+ "logqs_logps/rejected": 0.03654230386018753,
404
+ "loss": 0.2113,
405
+ "step": 350
406
+ },
407
+ {
408
+ "epoch": 0.5777911525729762,
409
+ "grad_norm": 0.62109375,
410
+ "learning_rate": 4.8706197508600984e-05,
411
+ "logps_logqs/chosen": 0.07344283908605576,
412
+ "logqs/chosen": -0.8101444244384766,
413
+ "logqs/rejected": -0.9295485615730286,
414
+ "logqs_logps/rejected": 0.03901313990354538,
415
+ "loss": 0.2151,
416
+ "step": 360
417
+ },
418
+ {
419
+ "epoch": 0.5938409068111145,
420
+ "grad_norm": 0.62890625,
421
+ "learning_rate": 4.855380233722915e-05,
422
+ "logps_logqs/chosen": 0.07421533018350601,
423
+ "logqs/chosen": -0.7826108336448669,
424
+ "logqs/rejected": -0.9229670763015747,
425
+ "logqs_logps/rejected": 0.038314513862133026,
426
+ "loss": 0.2129,
427
+ "step": 370
428
+ },
429
+ {
430
+ "epoch": 0.6098906610492527,
431
+ "grad_norm": 0.62890625,
432
+ "learning_rate": 4.839319048984217e-05,
433
+ "logps_logqs/chosen": 0.07273373752832413,
434
+ "logqs/chosen": -0.7798442840576172,
435
+ "logqs/rejected": -0.9371780157089233,
436
+ "logqs_logps/rejected": 0.03713950887322426,
437
+ "loss": 0.2067,
438
+ "step": 380
439
+ },
440
+ {
441
+ "epoch": 0.6259404152873909,
442
+ "grad_norm": 0.62890625,
443
+ "learning_rate": 4.822441799541979e-05,
444
+ "logps_logqs/chosen": 0.07329441606998444,
445
+ "logqs/chosen": -0.8102380037307739,
446
+ "logqs/rejected": -0.95549476146698,
447
+ "logqs_logps/rejected": 0.036079905927181244,
448
+ "loss": 0.2158,
449
+ "step": 390
450
+ },
451
+ {
452
+ "epoch": 0.6419901695255291,
453
+ "grad_norm": 0.66796875,
454
+ "learning_rate": 4.8047543729759936e-05,
455
+ "logps_logqs/chosen": 0.07404083013534546,
456
+ "logqs/chosen": -0.7847949266433716,
457
+ "logqs/rejected": -0.9250528216362,
458
+ "logqs_logps/rejected": 0.036556728184223175,
459
+ "loss": 0.2105,
460
+ "step": 400
461
+ },
462
+ {
463
+ "epoch": 0.6580399237636674,
464
+ "grad_norm": 0.69140625,
465
+ "learning_rate": 4.786262939494007e-05,
466
+ "logps_logqs/chosen": 0.07539906352758408,
467
+ "logqs/chosen": -0.8072575330734253,
468
+ "logqs/rejected": -0.9661371111869812,
469
+ "logqs_logps/rejected": 0.03662776201963425,
470
+ "loss": 0.2154,
471
+ "step": 410
472
+ },
473
+ {
474
+ "epoch": 0.6740896780018056,
475
+ "grad_norm": 0.66015625,
476
+ "learning_rate": 4.766973949779261e-05,
477
+ "logps_logqs/chosen": 0.0744672566652298,
478
+ "logqs/chosen": -0.787712574005127,
479
+ "logqs/rejected": -0.9350829124450684,
480
+ "logqs_logps/rejected": 0.0353250689804554,
481
+ "loss": 0.2074,
482
+ "step": 420
483
+ },
484
+ {
485
+ "epoch": 0.6901394322399438,
486
+ "grad_norm": 0.62890625,
487
+ "learning_rate": 4.746894132740186e-05,
488
+ "logps_logqs/chosen": 0.07364196330308914,
489
+ "logqs/chosen": -0.7813644409179688,
490
+ "logqs/rejected": -0.9208289980888367,
491
+ "logqs_logps/rejected": 0.0356430858373642,
492
+ "loss": 0.2053,
493
+ "step": 430
494
+ },
495
+ {
496
+ "epoch": 0.706189186478082,
497
+ "grad_norm": 0.56640625,
498
+ "learning_rate": 4.726030493163044e-05,
499
+ "logps_logqs/chosen": 0.07587061077356339,
500
+ "logqs/chosen": -0.7853146195411682,
501
+ "logqs/rejected": -0.8944910168647766,
502
+ "logqs_logps/rejected": 0.03531279042363167,
503
+ "loss": 0.2078,
504
+ "step": 440
505
+ },
506
+ {
507
+ "epoch": 0.7222389407162203,
508
+ "grad_norm": 0.68359375,
509
+ "learning_rate": 4.7043903092683314e-05,
510
+ "logps_logqs/chosen": 0.07814273238182068,
511
+ "logqs/chosen": -0.8133522272109985,
512
+ "logqs/rejected": -0.9508693814277649,
513
+ "logqs_logps/rejected": 0.030778918415308,
514
+ "loss": 0.2104,
515
+ "step": 450
516
+ },
517
+ {
518
+ "epoch": 0.7382886949543586,
519
+ "grad_norm": 0.72265625,
520
+ "learning_rate": 4.6819811301717885e-05,
521
+ "logps_logqs/chosen": 0.07763786613941193,
522
+ "logqs/chosen": -0.8061367869377136,
523
+ "logqs/rejected": -0.9445575475692749,
524
+ "logqs_logps/rejected": 0.035373255610466,
525
+ "loss": 0.2084,
526
+ "step": 460
527
+ },
528
+ {
529
+ "epoch": 0.7543384491924967,
530
+ "grad_norm": 0.59765625,
531
+ "learning_rate": 4.6588107732509134e-05,
532
+ "logps_logqs/chosen": 0.07325359433889389,
533
+ "logqs/chosen": -0.7970572710037231,
534
+ "logqs/rejected": -0.9243482351303101,
535
+ "logqs_logps/rejected": 0.03834759443998337,
536
+ "loss": 0.2088,
537
+ "step": 470
538
+ },
539
+ {
540
+ "epoch": 0.770388203430635,
541
+ "grad_norm": 0.5625,
542
+ "learning_rate": 4.634887321417895e-05,
543
+ "logps_logqs/chosen": 0.0732722282409668,
544
+ "logqs/chosen": -0.7719672918319702,
545
+ "logqs/rejected": -0.9193938970565796,
546
+ "logqs_logps/rejected": 0.03625096380710602,
547
+ "loss": 0.2075,
548
+ "step": 480
549
+ },
550
+ {
551
+ "epoch": 0.7864379576687732,
552
+ "grad_norm": 0.90234375,
553
+ "learning_rate": 4.6102191202999065e-05,
554
+ "logps_logqs/chosen": 0.07290570437908173,
555
+ "logqs/chosen": -0.8092619180679321,
556
+ "logqs/rejected": -0.9642523527145386,
557
+ "logqs_logps/rejected": 0.03311945125460625,
558
+ "loss": 0.2045,
559
+ "step": 490
560
+ },
561
+ {
562
+ "epoch": 0.8024877119069114,
563
+ "grad_norm": 0.6015625,
564
+ "learning_rate": 4.5848147753277656e-05,
565
+ "logps_logqs/chosen": 0.07134760171175003,
566
+ "logqs/chosen": -0.7773372530937195,
567
+ "logqs/rejected": -0.9478441476821899,
568
+ "logqs_logps/rejected": 0.03295496851205826,
569
+ "loss": 0.2049,
570
+ "step": 500
571
+ },
572
+ {
573
+ "epoch": 0.8185374661450496,
574
+ "grad_norm": 0.5859375,
575
+ "learning_rate": 4.5586831487339485e-05,
576
+ "logps_logqs/chosen": 0.07219503819942474,
577
+ "logqs/chosen": -0.7950411438941956,
578
+ "logqs/rejected": -0.9455002546310425,
579
+ "logqs_logps/rejected": 0.03765324503183365,
580
+ "loss": 0.208,
581
+ "step": 510
582
+ },
583
+ {
584
+ "epoch": 0.8345872203831879,
585
+ "grad_norm": 0.62109375,
586
+ "learning_rate": 4.531833356461027e-05,
587
+ "logps_logqs/chosen": 0.06849464029073715,
588
+ "logqs/chosen": -0.7819440364837646,
589
+ "logqs/rejected": -0.9597524404525757,
590
+ "logqs_logps/rejected": 0.04056422412395477,
591
+ "loss": 0.2084,
592
+ "step": 520
593
+ },
594
+ {
595
+ "epoch": 0.8506369746213261,
596
+ "grad_norm": 0.6171875,
597
+ "learning_rate": 4.5042747649816006e-05,
598
+ "logps_logqs/chosen": 0.07160626351833344,
599
+ "logqs/chosen": -0.7721427083015442,
600
+ "logqs/rejected": -0.94866544008255,
601
+ "logqs_logps/rejected": 0.03749927878379822,
602
+ "loss": 0.2068,
603
+ "step": 530
604
+ },
605
+ {
606
+ "epoch": 0.8666867288594643,
607
+ "grad_norm": 0.5546875,
608
+ "learning_rate": 4.476016988030826e-05,
609
+ "logps_logqs/chosen": 0.07214485853910446,
610
+ "logqs/chosen": -0.7874671816825867,
611
+ "logqs/rejected": -0.9504098892211914,
612
+ "logqs_logps/rejected": 0.03507527709007263,
613
+ "loss": 0.2045,
614
+ "step": 540
615
+ },
616
+ {
617
+ "epoch": 0.8827364830976026,
618
+ "grad_norm": 0.54296875,
619
+ "learning_rate": 4.447069883252696e-05,
620
+ "logps_logqs/chosen": 0.07395409047603607,
621
+ "logqs/chosen": -0.771978497505188,
622
+ "logqs/rejected": -0.9129235148429871,
623
+ "logqs_logps/rejected": 0.039593033492565155,
624
+ "loss": 0.205,
625
+ "step": 550
626
+ },
627
+ {
628
+ "epoch": 0.8987862373357408,
629
+ "grad_norm": 0.61328125,
630
+ "learning_rate": 4.417443548761227e-05,
631
+ "logps_logqs/chosen": 0.07299650460481644,
632
+ "logqs/chosen": -0.7979342341423035,
633
+ "logqs/rejected": -0.9357426762580872,
634
+ "logqs_logps/rejected": 0.03937726467847824,
635
+ "loss": 0.2083,
636
+ "step": 560
637
+ },
638
+ {
639
+ "epoch": 0.9148359915738791,
640
+ "grad_norm": 0.57421875,
641
+ "learning_rate": 4.387148319617763e-05,
642
+ "logps_logqs/chosen": 0.06836996972560883,
643
+ "logqs/chosen": -0.7659951448440552,
644
+ "logqs/rejected": -0.8732549548149109,
645
+ "logqs_logps/rejected": 0.039081670343875885,
646
+ "loss": 0.203,
647
+ "step": 570
648
+ },
649
+ {
650
+ "epoch": 0.9308857458120172,
651
+ "grad_norm": 0.57421875,
652
+ "learning_rate": 4.356194764225618e-05,
653
+ "logps_logqs/chosen": 0.07397963851690292,
654
+ "logqs/chosen": -0.7891489863395691,
655
+ "logqs/rejected": -0.9294188618659973,
656
+ "logqs_logps/rejected": 0.03416060656309128,
657
+ "loss": 0.2024,
658
+ "step": 580
659
+ },
660
+ {
661
+ "epoch": 0.9469355000501555,
662
+ "grad_norm": 0.703125,
663
+ "learning_rate": 4.3245936806433205e-05,
664
+ "logps_logqs/chosen": 0.07466734945774078,
665
+ "logqs/chosen": -0.7958794832229614,
666
+ "logqs/rejected": -0.953458309173584,
667
+ "logqs_logps/rejected": 0.035235695540905,
668
+ "loss": 0.206,
669
+ "step": 590
670
+ },
671
+ {
672
+ "epoch": 0.9629852542882937,
673
+ "grad_norm": 0.5859375,
674
+ "learning_rate": 4.292356092817739e-05,
675
+ "logps_logqs/chosen": 0.07136549055576324,
676
+ "logqs/chosen": -0.7849777936935425,
677
+ "logqs/rejected": -0.9327009320259094,
678
+ "logqs_logps/rejected": 0.038332488387823105,
679
+ "loss": 0.2041,
680
+ "step": 600
681
+ },
682
+ {
683
+ "epoch": 0.979035008526432,
684
+ "grad_norm": 0.55859375,
685
+ "learning_rate": 4.259493246738409e-05,
686
+ "logps_logqs/chosen": 0.06976237148046494,
687
+ "logqs/chosen": -0.7658575773239136,
688
+ "logqs/rejected": -0.9027583003044128,
689
+ "logqs_logps/rejected": 0.03464614599943161,
690
+ "loss": 0.1993,
691
+ "step": 610
692
+ },
693
+ {
694
+ "epoch": 0.9950847627645701,
695
+ "grad_norm": 0.55078125,
696
+ "learning_rate": 4.226016606514411e-05,
697
+ "logps_logqs/chosen": 0.0706130638718605,
698
+ "logqs/chosen": -0.7847878932952881,
699
+ "logqs/rejected": -0.9509286880493164,
700
+ "logqs_logps/rejected": 0.03653167933225632,
701
+ "loss": 0.2035,
702
+ "step": 620
703
+ },
704
+ {
705
+ "epoch": 1.0111345170027084,
706
+ "grad_norm": 0.53515625,
707
+ "learning_rate": 4.191937850375136e-05,
708
+ "logps_logqs/chosen": 0.06913185119628906,
709
+ "logqs/chosen": -0.7918249368667603,
710
+ "logqs/rejected": -0.9458308219909668,
711
+ "logqs_logps/rejected": 0.030862990766763687,
712
+ "loss": 0.1886,
713
+ "step": 630
714
+ },
715
+ {
716
+ "epoch": 1.0271842712408465,
717
+ "grad_norm": 0.5625,
718
+ "learning_rate": 4.157268866596381e-05,
719
+ "logps_logqs/chosen": 0.0690259039402008,
720
+ "logqs/chosen": -0.7895857095718384,
721
+ "logqs/rejected": -0.9810531735420227,
722
+ "logqs_logps/rejected": 0.03111192025244236,
723
+ "loss": 0.187,
724
+ "step": 640
725
+ },
726
+ {
727
+ "epoch": 1.043234025478985,
728
+ "grad_norm": 0.53125,
729
+ "learning_rate": 4.1220217493531494e-05,
730
+ "logps_logqs/chosen": 0.06900795549154282,
731
+ "logqs/chosen": -0.7902609705924988,
732
+ "logqs/rejected": -0.9747546911239624,
733
+ "logqs_logps/rejected": 0.031710296869277954,
734
+ "loss": 0.1845,
735
+ "step": 650
736
+ },
737
+ {
738
+ "epoch": 1.059283779717123,
739
+ "grad_norm": 0.515625,
740
+ "learning_rate": 4.086208794500637e-05,
741
+ "logps_logqs/chosen": 0.07102300226688385,
742
+ "logqs/chosen": -0.7767165899276733,
743
+ "logqs/rejected": -0.9414197206497192,
744
+ "logqs_logps/rejected": 0.030909577384591103,
745
+ "loss": 0.1807,
746
+ "step": 660
747
+ },
748
+ {
749
+ "epoch": 1.0753335339552612,
750
+ "grad_norm": 0.494140625,
751
+ "learning_rate": 4.049842495284858e-05,
752
+ "logps_logqs/chosen": 0.06985093653202057,
753
+ "logqs/chosen": -0.7880310416221619,
754
+ "logqs/rejected": -0.9546613693237305,
755
+ "logqs_logps/rejected": 0.030261676758527756,
756
+ "loss": 0.186,
757
+ "step": 670
758
+ },
759
+ {
760
+ "epoch": 1.0913832881933996,
761
+ "grad_norm": 0.50390625,
762
+ "learning_rate": 4.012935537984414e-05,
763
+ "logps_logqs/chosen": 0.07058895379304886,
764
+ "logqs/chosen": -0.7935397624969482,
765
+ "logqs/rejected": -1.0117493867874146,
766
+ "logqs_logps/rejected": 0.029221097007393837,
767
+ "loss": 0.1835,
768
+ "step": 680
769
+ },
770
+ {
771
+ "epoch": 1.1074330424315377,
772
+ "grad_norm": 0.6171875,
773
+ "learning_rate": 3.9755007974849135e-05,
774
+ "logps_logqs/chosen": 0.06916572153568268,
775
+ "logqs/chosen": -0.7878638505935669,
776
+ "logqs/rejected": -0.9999720454216003,
777
+ "logqs_logps/rejected": 0.030744653195142746,
778
+ "loss": 0.1857,
779
+ "step": 690
780
+ },
781
+ {
782
+ "epoch": 1.123482796669676,
783
+ "grad_norm": 0.57421875,
784
+ "learning_rate": 3.93755133278762e-05,
785
+ "logps_logqs/chosen": 0.07014169543981552,
786
+ "logqs/chosen": -0.7882756590843201,
787
+ "logqs/rejected": -1.0001386404037476,
788
+ "logqs_logps/rejected": 0.02842717245221138,
789
+ "loss": 0.1832,
790
+ "step": 700
791
+ },
792
+ {
793
+ "epoch": 1.1395325509078142,
794
+ "grad_norm": 0.59375,
795
+ "learning_rate": 3.899100382453845e-05,
796
+ "logps_logqs/chosen": 0.0701603814959526,
797
+ "logqs/chosen": -0.7642520070075989,
798
+ "logqs/rejected": -0.9544415473937988,
799
+ "logqs_logps/rejected": 0.027573522180318832,
800
+ "loss": 0.1806,
801
+ "step": 710
802
+ },
803
+ {
804
+ "epoch": 1.1555823051459524,
805
+ "grad_norm": 0.50390625,
806
+ "learning_rate": 3.8601613599867156e-05,
807
+ "logps_logqs/chosen": 0.06890274584293365,
808
+ "logqs/chosen": -0.7708860635757446,
809
+ "logqs/rejected": -0.9537287950515747,
810
+ "logqs_logps/rejected": 0.028619807213544846,
811
+ "loss": 0.1804,
812
+ "step": 720
813
+ },
814
+ {
815
+ "epoch": 1.1716320593840908,
816
+ "grad_norm": 0.609375,
817
+ "learning_rate": 3.8207478491519216e-05,
818
+ "logps_logqs/chosen": 0.0682586207985878,
819
+ "logqs/chosen": -0.7696245908737183,
820
+ "logqs/rejected": -0.9157883524894714,
821
+ "logqs_logps/rejected": 0.028189942240715027,
822
+ "loss": 0.1814,
823
+ "step": 730
824
+ },
825
+ {
826
+ "epoch": 1.187681813622229,
827
+ "grad_norm": 0.59375,
828
+ "learning_rate": 3.780873599239044e-05,
829
+ "logps_logqs/chosen": 0.07058210670948029,
830
+ "logqs/chosen": -0.801421046257019,
831
+ "logqs/rejected": -0.9591751098632812,
832
+ "logqs_logps/rejected": 0.03063536249101162,
833
+ "loss": 0.1853,
834
+ "step": 740
835
+ },
836
+ {
837
+ "epoch": 1.203731567860367,
838
+ "grad_norm": 0.55859375,
839
+ "learning_rate": 3.740552520265167e-05,
840
+ "logps_logqs/chosen": 0.06886611133813858,
841
+ "logqs/chosen": -0.792157769203186,
842
+ "logqs/rejected": -0.9370824694633484,
843
+ "logqs_logps/rejected": 0.02984955906867981,
844
+ "loss": 0.1833,
845
+ "step": 750
846
+ },
847
+ {
848
+ "epoch": 1.2197813220985054,
849
+ "grad_norm": 0.55859375,
850
+ "learning_rate": 3.699798678122403e-05,
851
+ "logps_logqs/chosen": 0.07179007679224014,
852
+ "logqs/chosen": -0.7622597813606262,
853
+ "logqs/rejected": -0.9382703900337219,
854
+ "logqs_logps/rejected": 0.029157549142837524,
855
+ "loss": 0.1793,
856
+ "step": 760
857
+ },
858
+ {
859
+ "epoch": 1.2358310763366436,
860
+ "grad_norm": 0.546875,
861
+ "learning_rate": 3.6586262896710476e-05,
862
+ "logps_logqs/chosen": 0.06790535151958466,
863
+ "logqs/chosen": -0.7676440477371216,
864
+ "logqs/rejected": -0.928793728351593,
865
+ "logqs_logps/rejected": 0.03030979633331299,
866
+ "loss": 0.1808,
867
+ "step": 770
868
+ },
869
+ {
870
+ "epoch": 1.2518808305747817,
871
+ "grad_norm": 0.51171875,
872
+ "learning_rate": 3.61704971778007e-05,
873
+ "logps_logqs/chosen": 0.06736615300178528,
874
+ "logqs/chosen": -0.770586371421814,
875
+ "logqs/rejected": -0.9197053909301758,
876
+ "logqs_logps/rejected": 0.03245236724615097,
877
+ "loss": 0.1799,
878
+ "step": 780
879
+ },
880
+ {
881
+ "epoch": 1.26793058481292,
882
+ "grad_norm": 0.4765625,
883
+ "learning_rate": 3.575083466316664e-05,
884
+ "logps_logqs/chosen": 0.06956754624843597,
885
+ "logqs/chosen": -0.7774965167045593,
886
+ "logqs/rejected": -0.9702298045158386,
887
+ "logqs_logps/rejected": 0.030748773366212845,
888
+ "loss": 0.1831,
889
+ "step": 790
890
+ },
891
+ {
892
+ "epoch": 1.2839803390510582,
893
+ "grad_norm": 0.5078125,
894
+ "learning_rate": 3.532742175086621e-05,
895
+ "logps_logqs/chosen": 0.06920956075191498,
896
+ "logqs/chosen": -0.7709556818008423,
897
+ "logqs/rejected": -0.9387216567993164,
898
+ "logqs_logps/rejected": 0.027966167777776718,
899
+ "loss": 0.1788,
900
+ "step": 800
901
+ },
902
+ {
903
+ "epoch": 1.3000300932891964,
904
+ "grad_norm": 0.54296875,
905
+ "learning_rate": 3.490040614727272e-05,
906
+ "logps_logqs/chosen": 0.06927359104156494,
907
+ "logqs/chosen": -0.7772814035415649,
908
+ "logqs/rejected": -0.9691821932792664,
909
+ "logqs_logps/rejected": 0.028416061773896217,
910
+ "loss": 0.1781,
911
+ "step": 810
912
+ },
913
+ {
914
+ "epoch": 1.3160798475273348,
915
+ "grad_norm": 0.5078125,
916
+ "learning_rate": 3.446993681554797e-05,
917
+ "logps_logqs/chosen": 0.07202474772930145,
918
+ "logqs/chosen": -0.7855108380317688,
919
+ "logqs/rejected": -0.9708096385002136,
920
+ "logqs_logps/rejected": 0.030036652460694313,
921
+ "loss": 0.1793,
922
+ "step": 820
923
+ },
924
+ {
925
+ "epoch": 1.332129601765473,
926
+ "grad_norm": 0.65234375,
927
+ "learning_rate": 3.403616392367681e-05,
928
+ "logps_logqs/chosen": 0.07054628431797028,
929
+ "logqs/chosen": -0.7540086507797241,
930
+ "logqs/rejected": -0.9042137265205383,
931
+ "logqs_logps/rejected": 0.03296629339456558,
932
+ "loss": 0.1818,
933
+ "step": 830
934
+ },
935
+ {
936
+ "epoch": 1.3481793560036113,
937
+ "grad_norm": 0.5078125,
938
+ "learning_rate": 3.3599238792081575e-05,
939
+ "logps_logqs/chosen": 0.07161605358123779,
940
+ "logqs/chosen": -0.7662056684494019,
941
+ "logqs/rejected": -0.9423080682754517,
942
+ "logqs_logps/rejected": 0.031152984127402306,
943
+ "loss": 0.1786,
944
+ "step": 840
945
+ },
946
+ {
947
+ "epoch": 1.3642291102417494,
948
+ "grad_norm": 0.5390625,
949
+ "learning_rate": 3.315931384083431e-05,
950
+ "logps_logqs/chosen": 0.0672772079706192,
951
+ "logqs/chosen": -0.7698653936386108,
952
+ "logqs/rejected": -0.9608929753303528,
953
+ "logqs_logps/rejected": 0.031727343797683716,
954
+ "loss": 0.181,
955
+ "step": 850
956
+ },
957
+ {
958
+ "epoch": 1.3802788644798878,
959
+ "grad_norm": 0.5390625,
960
+ "learning_rate": 3.2716542536485474e-05,
961
+ "logps_logqs/chosen": 0.07064563035964966,
962
+ "logqs/chosen": -0.7671880125999451,
963
+ "logqs/rejected": -0.9258206486701965,
964
+ "logqs_logps/rejected": 0.03185782581567764,
965
+ "loss": 0.1802,
966
+ "step": 860
967
+ },
968
+ {
969
+ "epoch": 1.396328618718026,
970
+ "grad_norm": 0.59375,
971
+ "learning_rate": 3.2271079338527626e-05,
972
+ "logps_logqs/chosen": 0.07001273334026337,
973
+ "logqs/chosen": -0.7942059636116028,
974
+ "logqs/rejected": -0.9803145527839661,
975
+ "logqs_logps/rejected": 0.02900245226919651,
976
+ "loss": 0.1783,
977
+ "step": 870
978
+ },
979
+ {
980
+ "epoch": 1.412378372956164,
981
+ "grad_norm": 0.48828125,
982
+ "learning_rate": 3.1823079645512655e-05,
983
+ "logps_logqs/chosen": 0.06748739629983902,
984
+ "logqs/chosen": -0.7796913385391235,
985
+ "logqs/rejected": -1.0028189420700073,
986
+ "logqs_logps/rejected": 0.028837621212005615,
987
+ "loss": 0.1791,
988
+ "step": 880
989
+ },
990
+ {
991
+ "epoch": 1.4284281271943025,
992
+ "grad_norm": 0.5234375,
993
+ "learning_rate": 3.137269974084139e-05,
994
+ "logps_logqs/chosen": 0.07031874358654022,
995
+ "logqs/chosen": -0.7788208723068237,
996
+ "logqs/rejected": -0.9545127153396606,
997
+ "logqs_logps/rejected": 0.029628973454236984,
998
+ "loss": 0.179,
999
+ "step": 890
1000
+ },
1001
+ {
1002
+ "epoch": 1.4444778814324406,
1003
+ "grad_norm": 0.486328125,
1004
+ "learning_rate": 3.092009673824469e-05,
1005
+ "logps_logqs/chosen": 0.07582792639732361,
1006
+ "logqs/chosen": -0.8078464269638062,
1007
+ "logqs/rejected": -0.9633975028991699,
1008
+ "logqs_logps/rejected": 0.02939186617732048,
1009
+ "loss": 0.1885,
1010
+ "step": 900
1011
+ },
1012
+ {
1013
+ "epoch": 1.4605276356705787,
1014
+ "grad_norm": 0.53515625,
1015
+ "learning_rate": 3.0465428526974665e-05,
1016
+ "logps_logqs/chosen": 0.07225798070430756,
1017
+ "logqs/chosen": -0.7663235068321228,
1018
+ "logqs/rejected": -0.9817646741867065,
1019
+ "logqs_logps/rejected": 0.028154581785202026,
1020
+ "loss": 0.178,
1021
+ "step": 910
1022
+ },
1023
+ {
1024
+ "epoch": 1.4765773899087171,
1025
+ "grad_norm": 0.49609375,
1026
+ "learning_rate": 3.000885371672554e-05,
1027
+ "logps_logqs/chosen": 0.07049473375082016,
1028
+ "logqs/chosen": -0.7796770930290222,
1029
+ "logqs/rejected": -0.9203440546989441,
1030
+ "logqs_logps/rejected": 0.028858328238129616,
1031
+ "loss": 0.1765,
1032
+ "step": 920
1033
+ },
1034
+ {
1035
+ "epoch": 1.4926271441468553,
1036
+ "grad_norm": 0.53125,
1037
+ "learning_rate": 2.9550531582303082e-05,
1038
+ "logps_logqs/chosen": 0.07375530898571014,
1039
+ "logqs/chosen": -0.7871274352073669,
1040
+ "logqs/rejected": -0.950308620929718,
1041
+ "logqs_logps/rejected": 0.030121903866529465,
1042
+ "loss": 0.1821,
1043
+ "step": 930
1044
+ },
1045
+ {
1046
+ "epoch": 1.5086768983849934,
1047
+ "grad_norm": 0.462890625,
1048
+ "learning_rate": 2.909062200806208e-05,
1049
+ "logps_logqs/chosen": 0.06755580753087997,
1050
+ "logqs/chosen": -0.7797432541847229,
1051
+ "logqs/rejected": -0.9579198956489563,
1052
+ "logqs_logps/rejected": 0.027673590928316116,
1053
+ "loss": 0.1738,
1054
+ "step": 940
1055
+ },
1056
+ {
1057
+ "epoch": 1.5247266526231318,
1058
+ "grad_norm": 0.451171875,
1059
+ "learning_rate": 2.8629285432131083e-05,
1060
+ "logps_logqs/chosen": 0.06972555816173553,
1061
+ "logqs/chosen": -0.7843119502067566,
1062
+ "logqs/rejected": -0.971518874168396,
1063
+ "logqs_logps/rejected": 0.026922276243567467,
1064
+ "loss": 0.1771,
1065
+ "step": 950
1066
+ },
1067
+ {
1068
+ "epoch": 1.54077640686127,
1069
+ "grad_norm": 0.5234375,
1070
+ "learning_rate": 2.8166682790444116e-05,
1071
+ "logps_logqs/chosen": 0.0729864090681076,
1072
+ "logqs/chosen": -0.7914843559265137,
1073
+ "logqs/rejected": -0.9729417562484741,
1074
+ "logqs_logps/rejected": 0.02641429379582405,
1075
+ "loss": 0.1788,
1076
+ "step": 960
1077
+ },
1078
+ {
1079
+ "epoch": 1.556826161099408,
1080
+ "grad_norm": 0.4921875,
1081
+ "learning_rate": 2.7702975460598547e-05,
1082
+ "logps_logqs/chosen": 0.06970744580030441,
1083
+ "logqs/chosen": -0.7867680788040161,
1084
+ "logqs/rejected": -0.9702849388122559,
1085
+ "logqs_logps/rejected": 0.026300692930817604,
1086
+ "loss": 0.1753,
1087
+ "step": 970
1088
+ },
1089
+ {
1090
+ "epoch": 1.5728759153375464,
1091
+ "grad_norm": 0.51171875,
1092
+ "learning_rate": 2.723832520555905e-05,
1093
+ "logps_logqs/chosen": 0.06844428926706314,
1094
+ "logqs/chosen": -0.7796690464019775,
1095
+ "logqs/rejected": -0.9721530079841614,
1096
+ "logqs_logps/rejected": 0.03003394976258278,
1097
+ "loss": 0.1775,
1098
+ "step": 980
1099
+ },
1100
+ {
1101
+ "epoch": 1.5889256695756846,
1102
+ "grad_norm": 0.50390625,
1103
+ "learning_rate": 2.677289411722702e-05,
1104
+ "logps_logqs/chosen": 0.06915868073701859,
1105
+ "logqs/chosen": -0.7836161851882935,
1106
+ "logqs/rejected": -1.0066392421722412,
1107
+ "logqs_logps/rejected": 0.029805105179548264,
1108
+ "loss": 0.1834,
1109
+ "step": 990
1110
+ },
1111
+ {
1112
+ "epoch": 1.6049754238138227,
1113
+ "grad_norm": 0.52734375,
1114
+ "learning_rate": 2.63068445598953e-05,
1115
+ "logps_logqs/chosen": 0.07069100439548492,
1116
+ "logqs/chosen": -0.7810501456260681,
1117
+ "logqs/rejected": -0.9559534788131714,
1118
+ "logqs_logps/rejected": 0.02675846591591835,
1119
+ "loss": 0.1799,
1120
+ "step": 1000
1121
+ },
1122
+ {
1123
+ "epoch": 1.6210251780519611,
1124
+ "grad_norm": 0.455078125,
1125
+ "learning_rate": 2.5840339113607854e-05,
1126
+ "logps_logqs/chosen": 0.06957536935806274,
1127
+ "logqs/chosen": -0.776276171207428,
1128
+ "logqs/rejected": -0.9653003811836243,
1129
+ "logqs_logps/rejected": 0.029132988303899765,
1130
+ "loss": 0.1764,
1131
+ "step": 1010
1132
+ },
1133
+ {
1134
+ "epoch": 1.6370749322900993,
1135
+ "grad_norm": 0.4921875,
1136
+ "learning_rate": 2.53735405174442e-05,
1137
+ "logps_logqs/chosen": 0.06984798610210419,
1138
+ "logqs/chosen": -0.7904434204101562,
1139
+ "logqs/rejected": -0.9736318588256836,
1140
+ "logqs_logps/rejected": 0.02975938282907009,
1141
+ "loss": 0.1778,
1142
+ "step": 1020
1143
+ },
1144
+ {
1145
+ "epoch": 1.6531246865282374,
1146
+ "grad_norm": 0.50390625,
1147
+ "learning_rate": 2.490661161274835e-05,
1148
+ "logps_logqs/chosen": 0.06985798478126526,
1149
+ "logqs/chosen": -0.7864385843276978,
1150
+ "logqs/rejected": -0.9617294073104858,
1151
+ "logqs_logps/rejected": 0.027367640286684036,
1152
+ "loss": 0.1774,
1153
+ "step": 1030
1154
+ },
1155
+ {
1156
+ "epoch": 1.6691744407663758,
1157
+ "grad_norm": 0.4453125,
1158
+ "learning_rate": 2.443971528632205e-05,
1159
+ "logps_logqs/chosen": 0.07047738134860992,
1160
+ "logqs/chosen": -0.7980072498321533,
1161
+ "logqs/rejected": -0.9655786752700806,
1162
+ "logqs_logps/rejected": 0.028297554701566696,
1163
+ "loss": 0.1779,
1164
+ "step": 1040
1165
+ },
1166
+ {
1167
+ "epoch": 1.6852241950045141,
1168
+ "grad_norm": 0.515625,
1169
+ "learning_rate": 2.3973014413602238e-05,
1170
+ "logps_logqs/chosen": 0.07119600474834442,
1171
+ "logqs/chosen": -0.7848029732704163,
1172
+ "logqs/rejected": -1.003758192062378,
1173
+ "logqs_logps/rejected": 0.029630709439516068,
1174
+ "loss": 0.1808,
1175
+ "step": 1050
1176
+ },
1177
+ {
1178
+ "epoch": 1.701273949242652,
1179
+ "grad_norm": 0.498046875,
1180
+ "learning_rate": 2.3506671801842364e-05,
1181
+ "logps_logqs/chosen": 0.06773122400045395,
1182
+ "logqs/chosen": -0.8044145703315735,
1183
+ "logqs/rejected": -0.9743335843086243,
1184
+ "logqs_logps/rejected": 0.027200300246477127,
1185
+ "loss": 0.1769,
1186
+ "step": 1060
1187
+ },
1188
+ {
1189
+ "epoch": 1.7173237034807904,
1190
+ "grad_norm": 0.4921875,
1191
+ "learning_rate": 2.3040850133317597e-05,
1192
+ "logps_logqs/chosen": 0.07197652757167816,
1193
+ "logqs/chosen": -0.7732763886451721,
1194
+ "logqs/rejected": -0.9615533947944641,
1195
+ "logqs_logps/rejected": 0.02922072447836399,
1196
+ "loss": 0.1801,
1197
+ "step": 1070
1198
+ },
1199
+ {
1200
+ "epoch": 1.7333734577189288,
1201
+ "grad_norm": 0.48046875,
1202
+ "learning_rate": 2.2575711908573548e-05,
1203
+ "logps_logqs/chosen": 0.06941990554332733,
1204
+ "logqs/chosen": -0.7937701940536499,
1205
+ "logqs/rejected": -1.0073679685592651,
1206
+ "logqs_logps/rejected": 0.026350444182753563,
1207
+ "loss": 0.1764,
1208
+ "step": 1080
1209
+ },
1210
+ {
1211
+ "epoch": 1.749423211957067,
1212
+ "grad_norm": 0.447265625,
1213
+ "learning_rate": 2.2111419389738435e-05,
1214
+ "logps_logqs/chosen": 0.07039657980203629,
1215
+ "logqs/chosen": -0.7764157056808472,
1216
+ "logqs/rejected": -0.9544513821601868,
1217
+ "logqs_logps/rejected": 0.02772611379623413,
1218
+ "loss": 0.1755,
1219
+ "step": 1090
1220
+ },
1221
+ {
1222
+ "epoch": 1.765472966195205,
1223
+ "grad_norm": 0.5390625,
1224
+ "learning_rate": 2.1648134543918423e-05,
1225
+ "logps_logqs/chosen": 0.07292209565639496,
1226
+ "logqs/chosen": -0.7701107859611511,
1227
+ "logqs/rejected": -0.9618105888366699,
1228
+ "logqs_logps/rejected": 0.027484769001603127,
1229
+ "loss": 0.1762,
1230
+ "step": 1100
1231
+ },
1232
+ {
1233
+ "epoch": 1.7815227204333435,
1234
+ "grad_norm": 0.427734375,
1235
+ "learning_rate": 2.1186018986695842e-05,
1236
+ "logps_logqs/chosen": 0.07149146497249603,
1237
+ "logqs/chosen": -0.7753132581710815,
1238
+ "logqs/rejected": -0.926436722278595,
1239
+ "logqs_logps/rejected": 0.029845744371414185,
1240
+ "loss": 0.1779,
1241
+ "step": 1110
1242
+ },
1243
+ {
1244
+ "epoch": 1.7975724746714816,
1245
+ "grad_norm": 0.45703125,
1246
+ "learning_rate": 2.0725233925750063e-05,
1247
+ "logps_logqs/chosen": 0.06752609461545944,
1248
+ "logqs/chosen": -0.7796769738197327,
1249
+ "logqs/rejected": -0.9575828313827515,
1250
+ "logqs_logps/rejected": 0.03261734917759895,
1251
+ "loss": 0.1771,
1252
+ "step": 1120
1253
+ },
1254
+ {
1255
+ "epoch": 1.8136222289096198,
1256
+ "grad_norm": 0.54296875,
1257
+ "learning_rate": 2.026594010462068e-05,
1258
+ "logps_logqs/chosen": 0.07001613825559616,
1259
+ "logqs/chosen": -0.7918448448181152,
1260
+ "logqs/rejected": -0.9749159812927246,
1261
+ "logqs_logps/rejected": 0.028455784544348717,
1262
+ "loss": 0.1789,
1263
+ "step": 1130
1264
+ },
1265
+ {
1266
+ "epoch": 1.8296719831477581,
1267
+ "grad_norm": 0.478515625,
1268
+ "learning_rate": 1.980829774663256e-05,
1269
+ "logps_logqs/chosen": 0.07289810478687286,
1270
+ "logqs/chosen": -0.7838630676269531,
1271
+ "logqs/rejected": -0.9698160290718079,
1272
+ "logqs_logps/rejected": 0.02906595729291439,
1273
+ "loss": 0.1794,
1274
+ "step": 1140
1275
+ },
1276
+ {
1277
+ "epoch": 1.8457217373858963,
1278
+ "grad_norm": 0.458984375,
1279
+ "learning_rate": 1.9352466499002422e-05,
1280
+ "logps_logqs/chosen": 0.0761161744594574,
1281
+ "logqs/chosen": -0.7819662094116211,
1282
+ "logqs/rejected": -0.9463040232658386,
1283
+ "logqs_logps/rejected": 0.028005197644233704,
1284
+ "loss": 0.183,
1285
+ "step": 1150
1286
+ },
1287
+ {
1288
+ "epoch": 1.8617714916240344,
1289
+ "grad_norm": 0.474609375,
1290
+ "learning_rate": 1.8898605377146383e-05,
1291
+ "logps_logqs/chosen": 0.06957074254751205,
1292
+ "logqs/chosen": -0.7857328057289124,
1293
+ "logqs/rejected": -0.9639459848403931,
1294
+ "logqs_logps/rejected": 0.027649903669953346,
1295
+ "loss": 0.1753,
1296
+ "step": 1160
1297
+ },
1298
+ {
1299
+ "epoch": 1.8778212458621728,
1300
+ "grad_norm": 0.48046875,
1301
+ "learning_rate": 1.8446872709207847e-05,
1302
+ "logps_logqs/chosen": 0.07362545281648636,
1303
+ "logqs/chosen": -0.7835830450057983,
1304
+ "logqs/rejected": -1.0045692920684814,
1305
+ "logqs_logps/rejected": 0.027295967563986778,
1306
+ "loss": 0.1806,
1307
+ "step": 1170
1308
+ },
1309
+ {
1310
+ "epoch": 1.893871000100311,
1311
+ "grad_norm": 0.51171875,
1312
+ "learning_rate": 1.799742608082519e-05,
1313
+ "logps_logqs/chosen": 0.07236044853925705,
1314
+ "logqs/chosen": -0.7885088324546814,
1315
+ "logqs/rejected": -0.9487521052360535,
1316
+ "logqs_logps/rejected": 0.026073191314935684,
1317
+ "loss": 0.1723,
1318
+ "step": 1180
1319
+ },
1320
+ {
1321
+ "epoch": 1.909920754338449,
1322
+ "grad_norm": 0.51171875,
1323
+ "learning_rate": 1.7550422280158513e-05,
1324
+ "logps_logqs/chosen": 0.0707259327173233,
1325
+ "logqs/chosen": -0.7704340815544128,
1326
+ "logqs/rejected": -0.9094502329826355,
1327
+ "logqs_logps/rejected": 0.027560651302337646,
1328
+ "loss": 0.1758,
1329
+ "step": 1190
1330
+ },
1331
+ {
1332
+ "epoch": 1.9259705085765875,
1333
+ "grad_norm": 0.435546875,
1334
+ "learning_rate": 1.7106017243194487e-05,
1335
+ "logps_logqs/chosen": 0.07020822167396545,
1336
+ "logqs/chosen": -0.7692683935165405,
1337
+ "logqs/rejected": -0.9656769037246704,
1338
+ "logqs_logps/rejected": 0.02792223170399666,
1339
+ "loss": 0.177,
1340
+ "step": 1200
1341
+ },
1342
+ {
1343
+ "epoch": 1.9420202628147256,
1344
+ "grad_norm": 0.53515625,
1345
+ "learning_rate": 1.6664365999348594e-05,
1346
+ "logps_logqs/chosen": 0.06943775713443756,
1347
+ "logqs/chosen": -0.7688643932342529,
1348
+ "logqs/rejected": -0.9713398218154907,
1349
+ "logqs_logps/rejected": 0.026859009638428688,
1350
+ "loss": 0.1748,
1351
+ "step": 1210
1352
+ },
1353
+ {
1354
+ "epoch": 1.9580700170528638,
1355
+ "grad_norm": 0.51953125,
1356
+ "learning_rate": 1.6225622617383494e-05,
1357
+ "logps_logqs/chosen": 0.07070201635360718,
1358
+ "logqs/chosen": -0.7839328050613403,
1359
+ "logqs/rejected": -0.9712308645248413,
1360
+ "logqs_logps/rejected": 0.028167420998215675,
1361
+ "loss": 0.1763,
1362
+ "step": 1220
1363
+ },
1364
+ {
1365
+ "epoch": 1.9741197712910021,
1366
+ "grad_norm": 0.4453125,
1367
+ "learning_rate": 1.578994015166263e-05,
1368
+ "logps_logqs/chosen": 0.07289667427539825,
1369
+ "logqs/chosen": -0.7838398814201355,
1370
+ "logqs/rejected": -0.9522368311882019,
1371
+ "logqs_logps/rejected": 0.02768387272953987,
1372
+ "loss": 0.1773,
1373
+ "step": 1230
1374
+ },
1375
+ {
1376
+ "epoch": 1.9901695255291403,
1377
+ "grad_norm": 0.53515625,
1378
+ "learning_rate": 1.535747058875765e-05,
1379
+ "logps_logqs/chosen": 0.06881529092788696,
1380
+ "logqs/chosen": -0.790834903717041,
1381
+ "logqs/rejected": -0.9497320055961609,
1382
+ "logqs_logps/rejected": 0.029328888282179832,
1383
+ "loss": 0.1734,
1384
+ "step": 1240
1385
+ },
1386
+ {
1387
+ "epoch": 2.0062192797672784,
1388
+ "grad_norm": 0.43359375,
1389
+ "learning_rate": 1.4928364794428307e-05,
1390
+ "logps_logqs/chosen": 0.07006263732910156,
1391
+ "logqs/chosen": -0.799282431602478,
1392
+ "logqs/rejected": -1.0095001459121704,
1393
+ "logqs_logps/rejected": 0.02767905592918396,
1394
+ "loss": 0.1732,
1395
+ "step": 1250
1396
+ },
1397
+ {
1398
+ "epoch": 2.022269034005417,
1399
+ "grad_norm": 0.4296875,
1400
+ "learning_rate": 1.4502772460993385e-05,
1401
+ "logps_logqs/chosen": 0.06896007061004639,
1402
+ "logqs/chosen": -0.7676675319671631,
1403
+ "logqs/rejected": -0.9425755739212036,
1404
+ "logqs_logps/rejected": 0.02848168835043907,
1405
+ "loss": 0.1685,
1406
+ "step": 1260
1407
+ },
1408
+ {
1409
+ "epoch": 2.038318788243555,
1410
+ "grad_norm": 0.462890625,
1411
+ "learning_rate": 1.4080842055110993e-05,
1412
+ "logps_logqs/chosen": 0.06711134314537048,
1413
+ "logqs/chosen": -0.7576900720596313,
1414
+ "logqs/rejected": -0.9405368566513062,
1415
+ "logqs_logps/rejected": 0.02635120414197445,
1416
+ "loss": 0.1689,
1417
+ "step": 1270
1418
+ },
1419
+ {
1420
+ "epoch": 2.054368542481693,
1421
+ "grad_norm": 0.46484375,
1422
+ "learning_rate": 1.3662720765986341e-05,
1423
+ "logps_logqs/chosen": 0.06902097165584564,
1424
+ "logqs/chosen": -0.788707435131073,
1425
+ "logqs/rejected": -0.9549592733383179,
1426
+ "logqs_logps/rejected": 0.028454547747969627,
1427
+ "loss": 0.1686,
1428
+ "step": 1280
1429
+ },
1430
+ {
1431
+ "epoch": 2.0704182967198315,
1432
+ "grad_norm": 0.40625,
1433
+ "learning_rate": 1.3248554454025275e-05,
1434
+ "logps_logqs/chosen": 0.07042928040027618,
1435
+ "logqs/chosen": -0.7786656618118286,
1436
+ "logqs/rejected": -0.9685086011886597,
1437
+ "logqs_logps/rejected": 0.025051862001419067,
1438
+ "loss": 0.1673,
1439
+ "step": 1290
1440
+ },
1441
+ {
1442
+ "epoch": 2.08646805095797,
1443
+ "grad_norm": 0.447265625,
1444
+ "learning_rate": 1.2838487599951243e-05,
1445
+ "logps_logqs/chosen": 0.06794705986976624,
1446
+ "logqs/chosen": -0.7671617269515991,
1447
+ "logqs/rejected": -0.9481566548347473,
1448
+ "logqs_logps/rejected": 0.025887325406074524,
1449
+ "loss": 0.1696,
1450
+ "step": 1300
1451
+ },
1452
+ {
1453
+ "epoch": 2.1025178051961078,
1454
+ "grad_norm": 0.439453125,
1455
+ "learning_rate": 1.2432663254403638e-05,
1456
+ "logps_logqs/chosen": 0.06914719194173813,
1457
+ "logqs/chosen": -0.8097491264343262,
1458
+ "logqs/rejected": -0.9609512090682983,
1459
+ "logqs_logps/rejected": 0.025332655757665634,
1460
+ "loss": 0.1689,
1461
+ "step": 1310
1462
+ },
1463
+ {
1464
+ "epoch": 2.118567559434246,
1465
+ "grad_norm": 0.421875,
1466
+ "learning_rate": 1.2031222988034967e-05,
1467
+ "logps_logqs/chosen": 0.06850691139698029,
1468
+ "logqs/chosen": -0.7866081595420837,
1469
+ "logqs/rejected": -0.952921986579895,
1470
+ "logqs_logps/rejected": 0.02708747610449791,
1471
+ "loss": 0.1672,
1472
+ "step": 1320
1473
+ },
1474
+ {
1475
+ "epoch": 2.1346173136723845,
1476
+ "grad_norm": 0.45703125,
1477
+ "learning_rate": 1.1634306842124423e-05,
1478
+ "logps_logqs/chosen": 0.06721793115139008,
1479
+ "logqs/chosen": -0.7912784218788147,
1480
+ "logqs/rejected": -0.9742089509963989,
1481
+ "logqs_logps/rejected": 0.024986112490296364,
1482
+ "loss": 0.1668,
1483
+ "step": 1330
1484
+ },
1485
+ {
1486
+ "epoch": 2.1506670679105224,
1487
+ "grad_norm": 0.451171875,
1488
+ "learning_rate": 1.1242053279724762e-05,
1489
+ "logps_logqs/chosen": 0.06856787204742432,
1490
+ "logqs/chosen": -0.7681561708450317,
1491
+ "logqs/rejected": -0.9738146066665649,
1492
+ "logqs_logps/rejected": 0.025526920333504677,
1493
+ "loss": 0.1676,
1494
+ "step": 1340
1495
+ },
1496
+ {
1497
+ "epoch": 2.166716822148661,
1498
+ "grad_norm": 0.44140625,
1499
+ "learning_rate": 1.0854599137359953e-05,
1500
+ "logps_logqs/chosen": 0.07144749909639359,
1501
+ "logqs/chosen": -0.7849553823471069,
1502
+ "logqs/rejected": -0.9945628046989441,
1503
+ "logqs_logps/rejected": 0.02589735947549343,
1504
+ "loss": 0.1683,
1505
+ "step": 1350
1506
+ },
1507
+ {
1508
+ "epoch": 2.182766576386799,
1509
+ "grad_norm": 0.3984375,
1510
+ "learning_rate": 1.0472079577290111e-05,
1511
+ "logps_logqs/chosen": 0.06926636397838593,
1512
+ "logqs/chosen": -0.780665934085846,
1513
+ "logqs/rejected": -0.9741228222846985,
1514
+ "logqs_logps/rejected": 0.0255129374563694,
1515
+ "loss": 0.1668,
1516
+ "step": 1360
1517
+ },
1518
+ {
1519
+ "epoch": 2.198816330624937,
1520
+ "grad_norm": 0.431640625,
1521
+ "learning_rate": 1.009462804036059e-05,
1522
+ "logps_logqs/chosen": 0.06916262209415436,
1523
+ "logqs/chosen": -0.7870660424232483,
1524
+ "logqs/rejected": -0.9848629236221313,
1525
+ "logqs_logps/rejected": 0.024935439229011536,
1526
+ "loss": 0.1688,
1527
+ "step": 1370
1528
+ },
1529
+ {
1530
+ "epoch": 2.2148660848630755,
1531
+ "grad_norm": 0.48046875,
1532
+ "learning_rate": 9.722376199451436e-06,
1533
+ "logps_logqs/chosen": 0.06875228136777878,
1534
+ "logqs/chosen": -0.7785555124282837,
1535
+ "logqs/rejected": -0.9765084981918335,
1536
+ "logqs_logps/rejected": 0.02562164142727852,
1537
+ "loss": 0.1677,
1538
+ "step": 1380
1539
+ },
1540
+ {
1541
+ "epoch": 2.230915839101214,
1542
+ "grad_norm": 0.40234375,
1543
+ "learning_rate": 9.35545391354378e-06,
1544
+ "logps_logqs/chosen": 0.06958961486816406,
1545
+ "logqs/chosen": -0.7699087262153625,
1546
+ "logqs/rejected": -0.9525764584541321,
1547
+ "logqs_logps/rejected": 0.027162248268723488,
1548
+ "loss": 0.1672,
1549
+ "step": 1390
1550
+ },
1551
+ {
1552
+ "epoch": 2.246965593339352,
1553
+ "grad_norm": 0.416015625,
1554
+ "learning_rate": 8.993989182418824e-06,
1555
+ "logps_logqs/chosen": 0.0719502717256546,
1556
+ "logqs/chosen": -0.7669367790222168,
1557
+ "logqs/rejected": -0.9486163258552551,
1558
+ "logqs_logps/rejected": 0.02480030246078968,
1559
+ "loss": 0.1683,
1560
+ "step": 1400
1561
+ },
1562
+ {
1563
+ "epoch": 2.26301534757749,
1564
+ "grad_norm": 0.48046875,
1565
+ "learning_rate": 8.63810810200556e-06,
1566
+ "logps_logqs/chosen": 0.06904618442058563,
1567
+ "logqs/chosen": -0.7622929811477661,
1568
+ "logqs/rejected": -0.9743655920028687,
1569
+ "logqs_logps/rejected": 0.026619747281074524,
1570
+ "loss": 0.1678,
1571
+ "step": 1410
1572
+ },
1573
+ {
1574
+ "epoch": 2.2790651018156285,
1575
+ "grad_norm": 0.40234375,
1576
+ "learning_rate": 8.287934820392498e-06,
1577
+ "logps_logqs/chosen": 0.06923404335975647,
1578
+ "logqs/chosen": -0.7849544286727905,
1579
+ "logqs/rejected": -0.9630452990531921,
1580
+ "logqs_logps/rejected": 0.023480530828237534,
1581
+ "loss": 0.1651,
1582
+ "step": 1420
1583
+ },
1584
+ {
1585
+ "epoch": 2.295114856053767,
1586
+ "grad_norm": 0.4140625,
1587
+ "learning_rate": 7.943591494519016e-06,
1588
+ "logps_logqs/chosen": 0.06750839948654175,
1589
+ "logqs/chosen": -0.764995276927948,
1590
+ "logqs/rejected": -0.9358587265014648,
1591
+ "logqs_logps/rejected": 0.027208849787712097,
1592
+ "loss": 0.1667,
1593
+ "step": 1430
1594
+ },
1595
+ {
1596
+ "epoch": 2.311164610291905,
1597
+ "grad_norm": 0.40625,
1598
+ "learning_rate": 7.60519824756124e-06,
1599
+ "logps_logqs/chosen": 0.06875176727771759,
1600
+ "logqs/chosen": -0.8004514575004578,
1601
+ "logqs/rejected": -1.0356009006500244,
1602
+ "logqs_logps/rejected": 0.0247772429138422,
1603
+ "loss": 0.1681,
1604
+ "step": 1440
1605
+ },
1606
+ {
1607
+ "epoch": 2.327214364530043,
1608
+ "grad_norm": 0.41015625,
1609
+ "learning_rate": 7.272873127027449e-06,
1610
+ "logps_logqs/chosen": 0.06980612874031067,
1611
+ "logqs/chosen": -0.7612735033035278,
1612
+ "logqs/rejected": -0.9591197967529297,
1613
+ "logqs_logps/rejected": 0.0231007132679224,
1614
+ "loss": 0.1652,
1615
+ "step": 1450
1616
+ },
1617
+ {
1618
+ "epoch": 2.3432641187681815,
1619
+ "grad_norm": 0.41796875,
1620
+ "learning_rate": 6.946732063577488e-06,
1621
+ "logps_logqs/chosen": 0.0690704956650734,
1622
+ "logqs/chosen": -0.7698851823806763,
1623
+ "logqs/rejected": -0.949694037437439,
1624
+ "logqs_logps/rejected": 0.023757826536893845,
1625
+ "loss": 0.1655,
1626
+ "step": 1460
1627
+ },
1628
+ {
1629
+ "epoch": 2.3593138730063195,
1630
+ "grad_norm": 0.4375,
1631
+ "learning_rate": 6.6268888305807296e-06,
1632
+ "logps_logqs/chosen": 0.06597896665334702,
1633
+ "logqs/chosen": -0.7630634903907776,
1634
+ "logqs/rejected": -0.9974457025527954,
1635
+ "logqs_logps/rejected": 0.02252907119691372,
1636
+ "loss": 0.1648,
1637
+ "step": 1470
1638
+ },
1639
+ {
1640
+ "epoch": 2.375363627244458,
1641
+ "grad_norm": 0.404296875,
1642
+ "learning_rate": 6.313455004426577e-06,
1643
+ "logps_logqs/chosen": 0.06898193061351776,
1644
+ "logqs/chosen": -0.77290940284729,
1645
+ "logqs/rejected": -0.9675156474113464,
1646
+ "logqs_logps/rejected": 0.024294773116707802,
1647
+ "loss": 0.166,
1648
+ "step": 1480
1649
+ },
1650
+ {
1651
+ "epoch": 2.391413381482596,
1652
+ "grad_norm": 0.443359375,
1653
+ "learning_rate": 6.0065399256013204e-06,
1654
+ "logps_logqs/chosen": 0.0698733851313591,
1655
+ "logqs/chosen": -0.761997401714325,
1656
+ "logqs/rejected": -0.9315551519393921,
1657
+ "logqs_logps/rejected": 0.02528567612171173,
1658
+ "loss": 0.1696,
1659
+ "step": 1490
1660
+ },
1661
+ {
1662
+ "epoch": 2.407463135720734,
1663
+ "grad_norm": 0.423828125,
1664
+ "learning_rate": 5.7062506605450454e-06,
1665
+ "logps_logqs/chosen": 0.06849467009305954,
1666
+ "logqs/chosen": -0.7781127095222473,
1667
+ "logqs/rejected": -0.9850066304206848,
1668
+ "logqs_logps/rejected": 0.026143735274672508,
1669
+ "loss": 0.1703,
1670
+ "step": 1500
1671
+ },
1672
+ {
1673
+ "epoch": 2.4235128899588725,
1674
+ "grad_norm": 0.443359375,
1675
+ "learning_rate": 5.412691964301827e-06,
1676
+ "logps_logqs/chosen": 0.06971971690654755,
1677
+ "logqs/chosen": -0.7925983667373657,
1678
+ "logqs/rejected": -0.9926835894584656,
1679
+ "logqs_logps/rejected": 0.025326719507575035,
1680
+ "loss": 0.1685,
1681
+ "step": 1510
1682
+ },
1683
+ {
1684
+ "epoch": 2.439562644197011,
1685
+ "grad_norm": 0.470703125,
1686
+ "learning_rate": 5.125966243976218e-06,
1687
+ "logps_logqs/chosen": 0.07100898772478104,
1688
+ "logqs/chosen": -0.7900832891464233,
1689
+ "logqs/rejected": -0.9629155993461609,
1690
+ "logqs_logps/rejected": 0.025052938610315323,
1691
+ "loss": 0.1693,
1692
+ "step": 1520
1693
+ },
1694
+ {
1695
+ "epoch": 2.455612398435149,
1696
+ "grad_norm": 0.453125,
1697
+ "learning_rate": 4.846173523008824e-06,
1698
+ "logps_logqs/chosen": 0.06859283149242401,
1699
+ "logqs/chosen": -0.7894052267074585,
1700
+ "logqs/rejected": -0.9897419810295105,
1701
+ "logqs_logps/rejected": 0.02611861191689968,
1702
+ "loss": 0.1683,
1703
+ "step": 1530
1704
+ },
1705
+ {
1706
+ "epoch": 2.471662152673287,
1707
+ "grad_norm": 0.439453125,
1708
+ "learning_rate": 4.573411406283409e-06,
1709
+ "logps_logqs/chosen": 0.07065166532993317,
1710
+ "logqs/chosen": -0.8073331117630005,
1711
+ "logqs/rejected": -0.9767266511917114,
1712
+ "logqs_logps/rejected": 0.0274626724421978,
1713
+ "loss": 0.1756,
1714
+ "step": 1540
1715
+ },
1716
+ {
1717
+ "epoch": 2.4877119069114255,
1718
+ "grad_norm": 0.44140625,
1719
+ "learning_rate": 4.307775046077739e-06,
1720
+ "logps_logqs/chosen": 0.0715617686510086,
1721
+ "logqs/chosen": -0.8018879890441895,
1722
+ "logqs/rejected": -0.9990041851997375,
1723
+ "logqs_logps/rejected": 0.026406193152070045,
1724
+ "loss": 0.1719,
1725
+ "step": 1550
1726
+ },
1727
+ {
1728
+ "epoch": 2.5037616611495634,
1729
+ "grad_norm": 0.46875,
1730
+ "learning_rate": 4.049357108869964e-06,
1731
+ "logps_logqs/chosen": 0.06756819784641266,
1732
+ "logqs/chosen": -0.7610915899276733,
1733
+ "logqs/rejected": -0.972091794013977,
1734
+ "logqs_logps/rejected": 0.026423901319503784,
1735
+ "loss": 0.1683,
1736
+ "step": 1560
1737
+ },
1738
+ {
1739
+ "epoch": 2.519811415387702,
1740
+ "grad_norm": 0.439453125,
1741
+ "learning_rate": 3.798247743012201e-06,
1742
+ "logps_logqs/chosen": 0.06909768283367157,
1743
+ "logqs/chosen": -0.7960779666900635,
1744
+ "logqs/rejected": -0.9779514074325562,
1745
+ "logqs_logps/rejected": 0.0239783376455307,
1746
+ "loss": 0.1662,
1747
+ "step": 1570
1748
+ },
1749
+ {
1750
+ "epoch": 2.53586116962584,
1751
+ "grad_norm": 0.43359375,
1752
+ "learning_rate": 3.554534547282512e-06,
1753
+ "logps_logqs/chosen": 0.07034210860729218,
1754
+ "logqs/chosen": -0.7767470479011536,
1755
+ "logqs/rejected": -0.9516876339912415,
1756
+ "logqs_logps/rejected": 0.028661763295531273,
1757
+ "loss": 0.17,
1758
+ "step": 1580
1759
+ },
1760
+ {
1761
+ "epoch": 2.5519109238639786,
1762
+ "grad_norm": 0.404296875,
1763
+ "learning_rate": 3.318302540326343e-06,
1764
+ "logps_logqs/chosen": 0.07079549133777618,
1765
+ "logqs/chosen": -0.777116596698761,
1766
+ "logqs/rejected": -0.9669076204299927,
1767
+ "logqs_logps/rejected": 0.026164010167121887,
1768
+ "loss": 0.1723,
1769
+ "step": 1590
1770
+ },
1771
+ {
1772
+ "epoch": 2.5679606781021165,
1773
+ "grad_norm": 0.431640625,
1774
+ "learning_rate": 3.089634130998026e-06,
1775
+ "logps_logqs/chosen": 0.06778942793607712,
1776
+ "logqs/chosen": -0.7698632478713989,
1777
+ "logqs/rejected": -0.9828590154647827,
1778
+ "logqs_logps/rejected": 0.02352612093091011,
1779
+ "loss": 0.166,
1780
+ "step": 1600
1781
+ },
1782
+ {
1783
+ "epoch": 2.584010432340255,
1784
+ "grad_norm": 0.4296875,
1785
+ "learning_rate": 2.8686090896126587e-06,
1786
+ "logps_logqs/chosen": 0.06593993306159973,
1787
+ "logqs/chosen": -0.7838481664657593,
1788
+ "logqs/rejected": -0.9900785684585571,
1789
+ "logqs_logps/rejected": 0.023241404443979263,
1790
+ "loss": 0.1633,
1791
+ "step": 1610
1792
+ },
1793
+ {
1794
+ "epoch": 2.6000601865783928,
1795
+ "grad_norm": 0.439453125,
1796
+ "learning_rate": 2.655304520118482e-06,
1797
+ "logps_logqs/chosen": 0.0693887323141098,
1798
+ "logqs/chosen": -0.778706431388855,
1799
+ "logqs/rejected": -0.9755508303642273,
1800
+ "logqs_logps/rejected": 0.02637295424938202,
1801
+ "loss": 0.1664,
1802
+ "step": 1620
1803
+ },
1804
+ {
1805
+ "epoch": 2.616109940816531,
1806
+ "grad_norm": 0.44140625,
1807
+ "learning_rate": 2.44979483319939e-06,
1808
+ "logps_logqs/chosen": 0.06880888342857361,
1809
+ "logqs/chosen": -0.7670097351074219,
1810
+ "logqs/rejected": -1.0248219966888428,
1811
+ "logqs_logps/rejected": 0.025341719388961792,
1812
+ "loss": 0.1667,
1813
+ "step": 1630
1814
+ },
1815
+ {
1816
+ "epoch": 2.6321596950546695,
1817
+ "grad_norm": 0.44140625,
1818
+ "learning_rate": 2.252151720316964e-06,
1819
+ "logps_logqs/chosen": 0.0697592943906784,
1820
+ "logqs/chosen": -0.7662806510925293,
1821
+ "logqs/rejected": -0.9526630640029907,
1822
+ "logqs_logps/rejected": 0.024268481880426407,
1823
+ "loss": 0.1672,
1824
+ "step": 1640
1825
+ },
1826
+ {
1827
+ "epoch": 2.648209449292808,
1828
+ "grad_norm": 0.458984375,
1829
+ "learning_rate": 2.0624441287011213e-06,
1830
+ "logps_logqs/chosen": 0.06959348171949387,
1831
+ "logqs/chosen": -0.7930929064750671,
1832
+ "logqs/rejected": -0.9900212287902832,
1833
+ "logqs_logps/rejected": 0.02552485466003418,
1834
+ "loss": 0.1705,
1835
+ "step": 1650
1836
+ },
1837
+ {
1838
+ "epoch": 2.664259203530946,
1839
+ "grad_norm": 0.419921875,
1840
+ "learning_rate": 1.8807382372980687e-06,
1841
+ "logps_logqs/chosen": 0.06878109276294708,
1842
+ "logqs/chosen": -0.776373565196991,
1843
+ "logqs/rejected": -0.9679840207099915,
1844
+ "logqs_logps/rejected": 0.024887990206480026,
1845
+ "loss": 0.1656,
1846
+ "step": 1660
1847
+ },
1848
+ {
1849
+ "epoch": 2.680308957769084,
1850
+ "grad_norm": 0.431640625,
1851
+ "learning_rate": 1.7070974336839796e-06,
1852
+ "logps_logqs/chosen": 0.06730766594409943,
1853
+ "logqs/chosen": -0.7652665972709656,
1854
+ "logqs/rejected": -0.9662426114082336,
1855
+ "logqs_logps/rejected": 0.026059061288833618,
1856
+ "loss": 0.1667,
1857
+ "step": 1670
1858
+ },
1859
+ {
1860
+ "epoch": 2.6963587120072225,
1861
+ "grad_norm": 0.453125,
1862
+ "learning_rate": 1.541582291952401e-06,
1863
+ "logps_logqs/chosen": 0.06985867768526077,
1864
+ "logqs/chosen": -0.7848519086837769,
1865
+ "logqs/rejected": -0.9851476550102234,
1866
+ "logqs_logps/rejected": 0.02311038039624691,
1867
+ "loss": 0.1688,
1868
+ "step": 1680
1869
+ },
1870
+ {
1871
+ "epoch": 2.7124084662453605,
1872
+ "grad_norm": 0.4375,
1873
+ "learning_rate": 1.38425055158318e-06,
1874
+ "logps_logqs/chosen": 0.0703495591878891,
1875
+ "logqs/chosen": -0.7814745903015137,
1876
+ "logqs/rejected": -0.9425519108772278,
1877
+ "logqs_logps/rejected": 0.026310011744499207,
1878
+ "loss": 0.1639,
1879
+ "step": 1690
1880
+ },
1881
+ {
1882
+ "epoch": 2.728458220483499,
1883
+ "grad_norm": 0.44921875,
1884
+ "learning_rate": 1.235157097300188e-06,
1885
+ "logps_logqs/chosen": 0.06932912766933441,
1886
+ "logqs/chosen": -0.7783406972885132,
1887
+ "logqs/rejected": -0.9719565510749817,
1888
+ "logqs_logps/rejected": 0.025869470089673996,
1889
+ "loss": 0.1671,
1890
+ "step": 1700
1891
+ },
1892
+ {
1893
+ "epoch": 2.744507974721637,
1894
+ "grad_norm": 0.40625,
1895
+ "learning_rate": 1.0943539399249635e-06,
1896
+ "logps_logqs/chosen": 0.07044648379087448,
1897
+ "logqs/chosen": -0.776070237159729,
1898
+ "logqs/rejected": -0.9304519891738892,
1899
+ "logqs_logps/rejected": 0.02800569497048855,
1900
+ "loss": 0.169,
1901
+ "step": 1710
1902
+ },
1903
+ {
1904
+ "epoch": 2.7605577289597756,
1905
+ "grad_norm": 0.4609375,
1906
+ "learning_rate": 9.618901982328704e-07,
1907
+ "logps_logqs/chosen": 0.06949154287576675,
1908
+ "logqs/chosen": -0.7787348031997681,
1909
+ "logqs/rejected": -0.9538747668266296,
1910
+ "logqs_logps/rejected": 0.027285417541861534,
1911
+ "loss": 0.169,
1912
+ "step": 1720
1913
+ },
1914
+ {
1915
+ "epoch": 2.7766074831979135,
1916
+ "grad_norm": 0.427734375,
1917
+ "learning_rate": 8.378120818181707e-07,
1918
+ "logps_logqs/chosen": 0.07005327194929123,
1919
+ "logqs/chosen": -0.7800394296646118,
1920
+ "logqs/rejected": -0.9388517141342163,
1921
+ "logqs_logps/rejected": 0.024246862158179283,
1922
+ "loss": 0.1638,
1923
+ "step": 1730
1924
+ },
1925
+ {
1926
+ "epoch": 2.792657237436052,
1927
+ "grad_norm": 0.39453125,
1928
+ "learning_rate": 7.221628749739223e-07,
1929
+ "logps_logqs/chosen": 0.07048022001981735,
1930
+ "logqs/chosen": -0.781145453453064,
1931
+ "logqs/rejected": -0.9654680490493774,
1932
+ "logqs_logps/rejected": 0.025187021121382713,
1933
+ "loss": 0.1646,
1934
+ "step": 1740
1935
+ },
1936
+ {
1937
+ "epoch": 2.80870699167419,
1938
+ "grad_norm": 0.41796875,
1939
+ "learning_rate": 6.149829215924025e-07,
1940
+ "logps_logqs/chosen": 0.06936784833669662,
1941
+ "logqs/chosen": -0.7857106328010559,
1942
+ "logqs/rejected": -1.0071794986724854,
1943
+ "logqs_logps/rejected": 0.02525334618985653,
1944
+ "loss": 0.1685,
1945
+ "step": 1750
1946
+ },
1947
+ {
1948
+ "epoch": 2.824756745912328,
1949
+ "grad_norm": 0.396484375,
1950
+ "learning_rate": 5.163096110912368e-07,
1951
+ "logps_logqs/chosen": 0.0710034891963005,
1952
+ "logqs/chosen": -0.7809524536132812,
1953
+ "logqs/rejected": -0.9730299711227417,
1954
+ "logqs_logps/rejected": 0.02650505304336548,
1955
+ "loss": 0.1685,
1956
+ "step": 1760
1957
+ },
1958
+ {
1959
+ "epoch": 2.8408065001504665,
1960
+ "grad_norm": 0.419921875,
1961
+ "learning_rate": 4.261773653702089e-07,
1962
+ "logps_logqs/chosen": 0.07018028199672699,
1963
+ "logqs/chosen": -0.7636402249336243,
1964
+ "logqs/rejected": -0.9362967610359192,
1965
+ "logqs_logps/rejected": 0.02620730921626091,
1966
+ "loss": 0.1674,
1967
+ "step": 1770
1968
+ },
1969
+ {
1970
+ "epoch": 2.856856254388605,
1971
+ "grad_norm": 0.5859375,
1972
+ "learning_rate": 3.4461762680329803e-07,
1973
+ "logps_logqs/chosen": 0.06559871137142181,
1974
+ "logqs/chosen": -0.7775349617004395,
1975
+ "logqs/rejected": -0.9631088376045227,
1976
+ "logqs_logps/rejected": 0.024396821856498718,
1977
+ "loss": 0.1651,
1978
+ "step": 1780
1979
+ },
1980
+ {
1981
+ "epoch": 2.872906008626743,
1982
+ "grad_norm": 0.419921875,
1983
+ "learning_rate": 2.716588472700815e-07,
1984
+ "logps_logqs/chosen": 0.07039310038089752,
1985
+ "logqs/chosen": -0.7576441168785095,
1986
+ "logqs/rejected": -0.9705005884170532,
1987
+ "logqs_logps/rejected": 0.026445040479302406,
1988
+ "loss": 0.1646,
1989
+ "step": 1790
1990
+ },
1991
+ {
1992
+ "epoch": 2.888955762864881,
1993
+ "grad_norm": 0.39453125,
1994
+ "learning_rate": 2.0732647823038243e-07,
1995
+ "logps_logqs/chosen": 0.06968151032924652,
1996
+ "logqs/chosen": -0.7765523791313171,
1997
+ "logqs/rejected": -0.9748941659927368,
1998
+ "logqs_logps/rejected": 0.02464660070836544,
1999
+ "loss": 0.1665,
2000
+ "step": 1800
2001
+ },
2002
+ {
2003
+ "epoch": 2.905005517103019,
2004
+ "grad_norm": 0.458984375,
2005
+ "learning_rate": 1.5164296184560222e-07,
2006
+ "logps_logqs/chosen": 0.06898736953735352,
2007
+ "logqs/chosen": -0.781166672706604,
2008
+ "logqs/rejected": -0.9676758646965027,
2009
+ "logqs_logps/rejected": 0.02475111000239849,
2010
+ "loss": 0.1665,
2011
+ "step": 1810
2012
+ },
2013
+ {
2014
+ "epoch": 2.9210552713411575,
2015
+ "grad_norm": 0.404296875,
2016
+ "learning_rate": 1.0462772314983882e-07,
2017
+ "logps_logqs/chosen": 0.06931523233652115,
2018
+ "logqs/chosen": -0.7759555578231812,
2019
+ "logqs/rejected": -0.9690700769424438,
2020
+ "logqs_logps/rejected": 0.024959508329629898,
2021
+ "loss": 0.1706,
2022
+ "step": 1820
2023
+ },
2024
+ {
2025
+ "epoch": 2.937105025579296,
2026
+ "grad_norm": 0.4609375,
2027
+ "learning_rate": 6.62971632735182e-08,
2028
+ "logps_logqs/chosen": 0.07144445180892944,
2029
+ "logqs/chosen": -0.7924820780754089,
2030
+ "logqs/rejected": -0.9925470352172852,
2031
+ "logqs_logps/rejected": 0.0241483636200428,
2032
+ "loss": 0.1691,
2033
+ "step": 1830
2034
+ },
2035
+ {
2036
+ "epoch": 2.9531547798174342,
2037
+ "grad_norm": 0.435546875,
2038
+ "learning_rate": 3.666465372190453e-08,
2039
+ "logps_logqs/chosen": 0.07032831013202667,
2040
+ "logqs/chosen": -0.7886664271354675,
2041
+ "logqs/rejected": -1.0002458095550537,
2042
+ "logqs_logps/rejected": 0.024760346859693527,
2043
+ "loss": 0.1681,
2044
+ "step": 1840
2045
+ },
2046
+ {
2047
+ "epoch": 2.969204534055572,
2048
+ "grad_norm": 0.431640625,
2049
+ "learning_rate": 1.57405317104925e-08,
2050
+ "logps_logqs/chosen": 0.068773552775383,
2051
+ "logqs/chosen": -0.7885466814041138,
2052
+ "logqs/rejected": -0.9979216456413269,
2053
+ "logqs_logps/rejected": 0.023049544543027878,
2054
+ "loss": 0.1691,
2055
+ "step": 1850
2056
+ },
2057
+ {
2058
+ "epoch": 2.9852542882937105,
2059
+ "grad_norm": 0.427734375,
2060
+ "learning_rate": 3.532096558903075e-09,
2061
+ "logps_logqs/chosen": 0.06932573765516281,
2062
+ "logqs/chosen": -0.7747775912284851,
2063
+ "logqs/rejected": -0.9474186897277832,
2064
+ "logqs_logps/rejected": 0.026311378926038742,
2065
+ "loss": 0.1678,
2066
+ "step": 1860
2067
+ }
2068
+ ],
2069
+ "logging_steps": 10,
2070
+ "max_steps": 1869,
2071
+ "num_input_tokens_seen": 0,
2072
+ "num_train_epochs": 3,
2073
+ "save_steps": 500,
2074
+ "stateful_callbacks": {
2075
+ "TrainerControl": {
2076
+ "args": {
2077
+ "should_epoch_stop": false,
2078
+ "should_evaluate": false,
2079
+ "should_log": false,
2080
+ "should_save": true,
2081
+ "should_training_stop": true
2082
+ },
2083
+ "attributes": {}
2084
+ }
2085
+ },
2086
+ "total_flos": 0.0,
2087
+ "train_batch_size": 2,
2088
+ "trial_name": null,
2089
+ "trial_params": null
2090
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-1869/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9fa66895bee5a36a6f750ef897f491a1113f2f79af77c75bdef8187ae10031
3
+ size 6840
qwen2.5-1.5B-it-distillm2/checkpoint-1869/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/checkpoint-623/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.45.2",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.45.2"
14
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/checkpoint-623/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce10fdb4c6ab7af484cb56ecdbf69a28f2b7f20797c260fe958442b97fc57848
3
+ size 3087935634
qwen2.5-1.5B-it-distillm2/checkpoint-623/special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "bos_token": "<|endoftext|>",
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|endoftext|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
qwen2.5-1.5B-it-distillm2/checkpoint-623/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": "<|endoftext|>",
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 2048,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/trainer_state.json ADDED
@@ -0,0 +1,726 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9998996890360117,
5
+ "eval_steps": 10000000,
6
+ "global_step": 623,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0016049754238138228,
13
+ "grad_norm": 7.15625,
14
+ "learning_rate": 2.6737967914438503e-07,
15
+ "logps_logqs/chosen": 0.057332463562488556,
16
+ "logqs/chosen": -0.752794086933136,
17
+ "logqs/rejected": -0.7512239813804626,
18
+ "logqs_logps/rejected": 0.1236220970749855,
19
+ "loss": 0.4632,
20
+ "step": 1
21
+ },
22
+ {
23
+ "epoch": 0.016049754238138227,
24
+ "grad_norm": 5.71875,
25
+ "learning_rate": 2.6737967914438504e-06,
26
+ "logps_logqs/chosen": 0.047926291823387146,
27
+ "logqs/chosen": -0.7439535856246948,
28
+ "logqs/rejected": -0.7035253643989563,
29
+ "logqs_logps/rejected": 0.13991409540176392,
30
+ "loss": 0.4557,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.032099508476276455,
35
+ "grad_norm": 1.9296875,
36
+ "learning_rate": 5.347593582887701e-06,
37
+ "logps_logqs/chosen": 0.03161158040165901,
38
+ "logqs/chosen": -0.6941366195678711,
39
+ "logqs/rejected": -0.6452130079269409,
40
+ "logqs_logps/rejected": 0.1449585109949112,
41
+ "loss": 0.4015,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.048149262714414685,
46
+ "grad_norm": 1.8515625,
47
+ "learning_rate": 8.02139037433155e-06,
48
+ "logps_logqs/chosen": 0.03457744047045708,
49
+ "logqs/chosen": -0.6867337226867676,
50
+ "logqs/rejected": -0.6923023462295532,
51
+ "logqs_logps/rejected": 0.12560859322547913,
52
+ "loss": 0.3565,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 0.06419901695255291,
57
+ "grad_norm": 1.234375,
58
+ "learning_rate": 1.0695187165775402e-05,
59
+ "logps_logqs/chosen": 0.05277745798230171,
60
+ "logqs/chosen": -0.7405093908309937,
61
+ "logqs/rejected": -0.7894801497459412,
62
+ "logqs_logps/rejected": 0.0936320573091507,
63
+ "loss": 0.3228,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 0.08024877119069114,
68
+ "grad_norm": 1.0,
69
+ "learning_rate": 1.3368983957219252e-05,
70
+ "logps_logqs/chosen": 0.04658779874444008,
71
+ "logqs/chosen": -0.7268352508544922,
72
+ "logqs/rejected": -0.7926934361457825,
73
+ "logqs_logps/rejected": 0.09889288991689682,
74
+ "loss": 0.3027,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.09629852542882937,
79
+ "grad_norm": 0.92578125,
80
+ "learning_rate": 1.60427807486631e-05,
81
+ "logps_logqs/chosen": 0.04193533584475517,
82
+ "logqs/chosen": -0.7151986956596375,
83
+ "logqs/rejected": -0.7687807083129883,
84
+ "logqs_logps/rejected": 0.09696364402770996,
85
+ "loss": 0.2856,
86
+ "step": 60
87
+ },
88
+ {
89
+ "epoch": 0.1123482796669676,
90
+ "grad_norm": 1.0625,
91
+ "learning_rate": 1.8716577540106954e-05,
92
+ "logps_logqs/chosen": 0.04730736091732979,
93
+ "logqs/chosen": -0.7385339736938477,
94
+ "logqs/rejected": -0.8247607946395874,
95
+ "logqs_logps/rejected": 0.08417234569787979,
96
+ "loss": 0.285,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 0.12839803390510582,
101
+ "grad_norm": 0.890625,
102
+ "learning_rate": 2.1390374331550803e-05,
103
+ "logps_logqs/chosen": 0.047996845096349716,
104
+ "logqs/chosen": -0.7431933879852295,
105
+ "logqs/rejected": -0.8130408525466919,
106
+ "logqs_logps/rejected": 0.08468352258205414,
107
+ "loss": 0.2797,
108
+ "step": 80
109
+ },
110
+ {
111
+ "epoch": 0.14444778814324405,
112
+ "grad_norm": 0.79296875,
113
+ "learning_rate": 2.4064171122994652e-05,
114
+ "logps_logqs/chosen": 0.04765843600034714,
115
+ "logqs/chosen": -0.7173298001289368,
116
+ "logqs/rejected": -0.7709535360336304,
117
+ "logqs_logps/rejected": 0.08819471299648285,
118
+ "loss": 0.2705,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 0.16049754238138228,
123
+ "grad_norm": 0.82421875,
124
+ "learning_rate": 2.6737967914438505e-05,
125
+ "logps_logqs/chosen": 0.040474437177181244,
126
+ "logqs/chosen": -0.710905909538269,
127
+ "logqs/rejected": -0.7959136962890625,
128
+ "logqs_logps/rejected": 0.09612198173999786,
129
+ "loss": 0.2683,
130
+ "step": 100
131
+ },
132
+ {
133
+ "epoch": 0.1765472966195205,
134
+ "grad_norm": 0.7109375,
135
+ "learning_rate": 2.9411764705882354e-05,
136
+ "logps_logqs/chosen": 0.020066609606146812,
137
+ "logqs/chosen": -0.6621483564376831,
138
+ "logqs/rejected": -0.7314690351486206,
139
+ "logqs_logps/rejected": 0.11639375984668732,
140
+ "loss": 0.2682,
141
+ "step": 110
142
+ },
143
+ {
144
+ "epoch": 0.19259705085765874,
145
+ "grad_norm": 0.9921875,
146
+ "learning_rate": 3.20855614973262e-05,
147
+ "logps_logqs/chosen": 0.017146889120340347,
148
+ "logqs/chosen": -0.6599355340003967,
149
+ "logqs/rejected": -0.7124370336532593,
150
+ "logqs_logps/rejected": 0.12015531212091446,
151
+ "loss": 0.268,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.20864680509579697,
156
+ "grad_norm": 1.78125,
157
+ "learning_rate": 3.4759358288770055e-05,
158
+ "logps_logqs/chosen": 0.0369986966252327,
159
+ "logqs/chosen": -0.7184829115867615,
160
+ "logqs/rejected": -0.8139392137527466,
161
+ "logqs_logps/rejected": 0.09705157577991486,
162
+ "loss": 0.2611,
163
+ "step": 130
164
+ },
165
+ {
166
+ "epoch": 0.2246965593339352,
167
+ "grad_norm": 0.87890625,
168
+ "learning_rate": 3.743315508021391e-05,
169
+ "logps_logqs/chosen": 0.07482504099607468,
170
+ "logqs/chosen": -0.7948209643363953,
171
+ "logqs/rejected": -0.9149974584579468,
172
+ "logqs_logps/rejected": 0.04995354264974594,
173
+ "loss": 0.2442,
174
+ "step": 140
175
+ },
176
+ {
177
+ "epoch": 0.24074631357207343,
178
+ "grad_norm": 0.796875,
179
+ "learning_rate": 4.0106951871657754e-05,
180
+ "logps_logqs/chosen": 0.08310296386480331,
181
+ "logqs/chosen": -0.812109649181366,
182
+ "logqs/rejected": -0.9168623089790344,
183
+ "logqs_logps/rejected": 0.041201137006282806,
184
+ "loss": 0.2403,
185
+ "step": 150
186
+ },
187
+ {
188
+ "epoch": 0.25679606781021164,
189
+ "grad_norm": 0.70703125,
190
+ "learning_rate": 4.2780748663101606e-05,
191
+ "logps_logqs/chosen": 0.08498374372720718,
192
+ "logqs/chosen": -0.8101779818534851,
193
+ "logqs/rejected": -0.9302076101303101,
194
+ "logqs_logps/rejected": 0.03671664744615555,
195
+ "loss": 0.2336,
196
+ "step": 160
197
+ },
198
+ {
199
+ "epoch": 0.2728458220483499,
200
+ "grad_norm": 0.85546875,
201
+ "learning_rate": 4.545454545454546e-05,
202
+ "logps_logqs/chosen": 0.08320538699626923,
203
+ "logqs/chosen": -0.8246332406997681,
204
+ "logqs/rejected": -0.9219290018081665,
205
+ "logqs_logps/rejected": 0.03701116889715195,
206
+ "loss": 0.2386,
207
+ "step": 170
208
+ },
209
+ {
210
+ "epoch": 0.2888955762864881,
211
+ "grad_norm": 0.77734375,
212
+ "learning_rate": 4.8128342245989304e-05,
213
+ "logps_logqs/chosen": 0.08179891854524612,
214
+ "logqs/chosen": -0.787845253944397,
215
+ "logqs/rejected": -0.9030144810676575,
216
+ "logqs_logps/rejected": 0.03777966648340225,
217
+ "loss": 0.2315,
218
+ "step": 180
219
+ },
220
+ {
221
+ "epoch": 0.30494533052462636,
222
+ "grad_norm": 0.7734375,
223
+ "learning_rate": 4.9999607536612036e-05,
224
+ "logps_logqs/chosen": 0.08169040083885193,
225
+ "logqs/chosen": -0.8232030868530273,
226
+ "logqs/rejected": -0.9288152456283569,
227
+ "logqs_logps/rejected": 0.039177440106868744,
228
+ "loss": 0.2389,
229
+ "step": 190
230
+ },
231
+ {
232
+ "epoch": 0.32099508476276456,
233
+ "grad_norm": 0.72265625,
234
+ "learning_rate": 4.9992630752499945e-05,
235
+ "logps_logqs/chosen": 0.08127471804618835,
236
+ "logqs/chosen": -0.8097829818725586,
237
+ "logqs/rejected": -0.9110867381095886,
238
+ "logqs_logps/rejected": 0.03667169809341431,
239
+ "loss": 0.2279,
240
+ "step": 200
241
+ },
242
+ {
243
+ "epoch": 0.3370448390009028,
244
+ "grad_norm": 0.71875,
245
+ "learning_rate": 4.997693536122969e-05,
246
+ "logps_logqs/chosen": 0.0788678377866745,
247
+ "logqs/chosen": -0.7994121313095093,
248
+ "logqs/rejected": -0.9303783178329468,
249
+ "logqs_logps/rejected": 0.037957318127155304,
250
+ "loss": 0.2291,
251
+ "step": 210
252
+ },
253
+ {
254
+ "epoch": 0.353094593239041,
255
+ "grad_norm": 0.7109375,
256
+ "learning_rate": 4.995252683809324e-05,
257
+ "logps_logqs/chosen": 0.08248866349458694,
258
+ "logqs/chosen": -0.8128089904785156,
259
+ "logqs/rejected": -0.9362344741821289,
260
+ "logqs_logps/rejected": 0.037949927151203156,
261
+ "loss": 0.229,
262
+ "step": 220
263
+ },
264
+ {
265
+ "epoch": 0.3691443474771793,
266
+ "grad_norm": 0.63671875,
267
+ "learning_rate": 4.9919413697933496e-05,
268
+ "logps_logqs/chosen": 0.0817771628499031,
269
+ "logqs/chosen": -0.7939696311950684,
270
+ "logqs/rejected": -0.9058554768562317,
271
+ "logqs_logps/rejected": 0.04094362258911133,
272
+ "loss": 0.2314,
273
+ "step": 230
274
+ },
275
+ {
276
+ "epoch": 0.3851941017153175,
277
+ "grad_norm": 0.71875,
278
+ "learning_rate": 4.987760749217389e-05,
279
+ "logps_logqs/chosen": 0.076979860663414,
280
+ "logqs/chosen": -0.7874332070350647,
281
+ "logqs/rejected": -0.9303415417671204,
282
+ "logqs_logps/rejected": 0.04022833704948425,
283
+ "loss": 0.2244,
284
+ "step": 240
285
+ },
286
+ {
287
+ "epoch": 0.4012438559534557,
288
+ "grad_norm": 0.83984375,
289
+ "learning_rate": 4.982712280478875e-05,
290
+ "logps_logqs/chosen": 0.07487889379262924,
291
+ "logqs/chosen": -0.7985113859176636,
292
+ "logqs/rejected": -0.9595896005630493,
293
+ "logqs_logps/rejected": 0.04083302244544029,
294
+ "loss": 0.2277,
295
+ "step": 250
296
+ },
297
+ {
298
+ "epoch": 0.41729361019159394,
299
+ "grad_norm": 0.6953125,
300
+ "learning_rate": 4.976797724721567e-05,
301
+ "logps_logqs/chosen": 0.07195514440536499,
302
+ "logqs/chosen": -0.787926971912384,
303
+ "logqs/rejected": -0.897616982460022,
304
+ "logqs_logps/rejected": 0.0396430678665638,
305
+ "loss": 0.2163,
306
+ "step": 260
307
+ },
308
+ {
309
+ "epoch": 0.43334336442973215,
310
+ "grad_norm": 0.7734375,
311
+ "learning_rate": 4.9700191452211806e-05,
312
+ "logps_logqs/chosen": 0.07640247046947479,
313
+ "logqs/chosen": -0.7960728406906128,
314
+ "logqs/rejected": -0.928481936454773,
315
+ "logqs_logps/rejected": 0.03928074985742569,
316
+ "loss": 0.2226,
317
+ "step": 270
318
+ },
319
+ {
320
+ "epoch": 0.4493931186678704,
321
+ "grad_norm": 0.72265625,
322
+ "learning_rate": 4.9623789066656276e-05,
323
+ "logps_logqs/chosen": 0.07622957229614258,
324
+ "logqs/chosen": -0.8077980279922485,
325
+ "logqs/rejected": -0.9034830927848816,
326
+ "logqs_logps/rejected": 0.046734608709812164,
327
+ "loss": 0.2256,
328
+ "step": 280
329
+ },
330
+ {
331
+ "epoch": 0.4654428729060086,
332
+ "grad_norm": 0.78515625,
333
+ "learning_rate": 4.953879674330093e-05,
334
+ "logps_logqs/chosen": 0.07244043052196503,
335
+ "logqs/chosen": -0.7900176644325256,
336
+ "logqs/rejected": -0.891532301902771,
337
+ "logqs_logps/rejected": 0.04387632757425308,
338
+ "loss": 0.217,
339
+ "step": 290
340
+ },
341
+ {
342
+ "epoch": 0.48149262714414687,
343
+ "grad_norm": 0.6796875,
344
+ "learning_rate": 4.944524413147263e-05,
345
+ "logps_logqs/chosen": 0.07289232313632965,
346
+ "logqs/chosen": -0.7856311798095703,
347
+ "logqs/rejected": -0.9099383354187012,
348
+ "logqs_logps/rejected": 0.03974480181932449,
349
+ "loss": 0.2173,
350
+ "step": 300
351
+ },
352
+ {
353
+ "epoch": 0.49754238138228507,
354
+ "grad_norm": 0.66796875,
355
+ "learning_rate": 4.934316386673022e-05,
356
+ "logps_logqs/chosen": 0.07063151150941849,
357
+ "logqs/chosen": -0.7815223932266235,
358
+ "logqs/rejected": -0.9079385995864868,
359
+ "logqs_logps/rejected": 0.041664548218250275,
360
+ "loss": 0.2126,
361
+ "step": 310
362
+ },
363
+ {
364
+ "epoch": 0.5135921356204233,
365
+ "grad_norm": 0.61328125,
366
+ "learning_rate": 4.923259155947964e-05,
367
+ "logps_logqs/chosen": 0.071230448782444,
368
+ "logqs/chosen": -0.8096257448196411,
369
+ "logqs/rejected": -0.9752206802368164,
370
+ "logqs_logps/rejected": 0.03939716890454292,
371
+ "loss": 0.2194,
372
+ "step": 320
373
+ },
374
+ {
375
+ "epoch": 0.5296418898585615,
376
+ "grad_norm": 0.6640625,
377
+ "learning_rate": 4.911356578255139e-05,
378
+ "logps_logqs/chosen": 0.07356056571006775,
379
+ "logqs/chosen": -0.7827819585800171,
380
+ "logqs/rejected": -0.9489104151725769,
381
+ "logqs_logps/rejected": 0.040141116827726364,
382
+ "loss": 0.2145,
383
+ "step": 330
384
+ },
385
+ {
386
+ "epoch": 0.5456916440966998,
387
+ "grad_norm": 0.7109375,
388
+ "learning_rate": 4.898612805774447e-05,
389
+ "logps_logqs/chosen": 0.07881536334753036,
390
+ "logqs/chosen": -0.8095147013664246,
391
+ "logqs/rejected": -0.9313008189201355,
392
+ "logqs_logps/rejected": 0.03702981770038605,
393
+ "loss": 0.2165,
394
+ "step": 340
395
+ },
396
+ {
397
+ "epoch": 0.561741398334838,
398
+ "grad_norm": 0.64453125,
399
+ "learning_rate": 4.885032284134165e-05,
400
+ "logps_logqs/chosen": 0.07632436603307724,
401
+ "logqs/chosen": -0.7979615926742554,
402
+ "logqs/rejected": -0.9368084073066711,
403
+ "logqs_logps/rejected": 0.03654230386018753,
404
+ "loss": 0.2113,
405
+ "step": 350
406
+ },
407
+ {
408
+ "epoch": 0.5777911525729762,
409
+ "grad_norm": 0.62109375,
410
+ "learning_rate": 4.8706197508600984e-05,
411
+ "logps_logqs/chosen": 0.07344283908605576,
412
+ "logqs/chosen": -0.8101444244384766,
413
+ "logqs/rejected": -0.9295485615730286,
414
+ "logqs_logps/rejected": 0.03901313990354538,
415
+ "loss": 0.2151,
416
+ "step": 360
417
+ },
418
+ {
419
+ "epoch": 0.5938409068111145,
420
+ "grad_norm": 0.62890625,
421
+ "learning_rate": 4.855380233722915e-05,
422
+ "logps_logqs/chosen": 0.07421533018350601,
423
+ "logqs/chosen": -0.7826108336448669,
424
+ "logqs/rejected": -0.9229670763015747,
425
+ "logqs_logps/rejected": 0.038314513862133026,
426
+ "loss": 0.2129,
427
+ "step": 370
428
+ },
429
+ {
430
+ "epoch": 0.6098906610492527,
431
+ "grad_norm": 0.62890625,
432
+ "learning_rate": 4.839319048984217e-05,
433
+ "logps_logqs/chosen": 0.07273373752832413,
434
+ "logqs/chosen": -0.7798442840576172,
435
+ "logqs/rejected": -0.9371780157089233,
436
+ "logqs_logps/rejected": 0.03713950887322426,
437
+ "loss": 0.2067,
438
+ "step": 380
439
+ },
440
+ {
441
+ "epoch": 0.6259404152873909,
442
+ "grad_norm": 0.62890625,
443
+ "learning_rate": 4.822441799541979e-05,
444
+ "logps_logqs/chosen": 0.07329441606998444,
445
+ "logqs/chosen": -0.8102380037307739,
446
+ "logqs/rejected": -0.95549476146698,
447
+ "logqs_logps/rejected": 0.036079905927181244,
448
+ "loss": 0.2158,
449
+ "step": 390
450
+ },
451
+ {
452
+ "epoch": 0.6419901695255291,
453
+ "grad_norm": 0.66796875,
454
+ "learning_rate": 4.8047543729759936e-05,
455
+ "logps_logqs/chosen": 0.07404083013534546,
456
+ "logqs/chosen": -0.7847949266433716,
457
+ "logqs/rejected": -0.9250528216362,
458
+ "logqs_logps/rejected": 0.036556728184223175,
459
+ "loss": 0.2105,
460
+ "step": 400
461
+ },
462
+ {
463
+ "epoch": 0.6580399237636674,
464
+ "grad_norm": 0.69140625,
465
+ "learning_rate": 4.786262939494007e-05,
466
+ "logps_logqs/chosen": 0.07539906352758408,
467
+ "logqs/chosen": -0.8072575330734253,
468
+ "logqs/rejected": -0.9661371111869812,
469
+ "logqs_logps/rejected": 0.03662776201963425,
470
+ "loss": 0.2154,
471
+ "step": 410
472
+ },
473
+ {
474
+ "epoch": 0.6740896780018056,
475
+ "grad_norm": 0.66015625,
476
+ "learning_rate": 4.766973949779261e-05,
477
+ "logps_logqs/chosen": 0.0744672566652298,
478
+ "logqs/chosen": -0.787712574005127,
479
+ "logqs/rejected": -0.9350829124450684,
480
+ "logqs_logps/rejected": 0.0353250689804554,
481
+ "loss": 0.2074,
482
+ "step": 420
483
+ },
484
+ {
485
+ "epoch": 0.6901394322399438,
486
+ "grad_norm": 0.62890625,
487
+ "learning_rate": 4.746894132740186e-05,
488
+ "logps_logqs/chosen": 0.07364196330308914,
489
+ "logqs/chosen": -0.7813644409179688,
490
+ "logqs/rejected": -0.9208289980888367,
491
+ "logqs_logps/rejected": 0.0356430858373642,
492
+ "loss": 0.2053,
493
+ "step": 430
494
+ },
495
+ {
496
+ "epoch": 0.706189186478082,
497
+ "grad_norm": 0.56640625,
498
+ "learning_rate": 4.726030493163044e-05,
499
+ "logps_logqs/chosen": 0.07587061077356339,
500
+ "logqs/chosen": -0.7853146195411682,
501
+ "logqs/rejected": -0.8944910168647766,
502
+ "logqs_logps/rejected": 0.03531279042363167,
503
+ "loss": 0.2078,
504
+ "step": 440
505
+ },
506
+ {
507
+ "epoch": 0.7222389407162203,
508
+ "grad_norm": 0.68359375,
509
+ "learning_rate": 4.7043903092683314e-05,
510
+ "logps_logqs/chosen": 0.07814273238182068,
511
+ "logqs/chosen": -0.8133522272109985,
512
+ "logqs/rejected": -0.9508693814277649,
513
+ "logqs_logps/rejected": 0.030778918415308,
514
+ "loss": 0.2104,
515
+ "step": 450
516
+ },
517
+ {
518
+ "epoch": 0.7382886949543586,
519
+ "grad_norm": 0.72265625,
520
+ "learning_rate": 4.6819811301717885e-05,
521
+ "logps_logqs/chosen": 0.07763786613941193,
522
+ "logqs/chosen": -0.8061367869377136,
523
+ "logqs/rejected": -0.9445575475692749,
524
+ "logqs_logps/rejected": 0.035373255610466,
525
+ "loss": 0.2084,
526
+ "step": 460
527
+ },
528
+ {
529
+ "epoch": 0.7543384491924967,
530
+ "grad_norm": 0.59765625,
531
+ "learning_rate": 4.6588107732509134e-05,
532
+ "logps_logqs/chosen": 0.07325359433889389,
533
+ "logqs/chosen": -0.7970572710037231,
534
+ "logqs/rejected": -0.9243482351303101,
535
+ "logqs_logps/rejected": 0.03834759443998337,
536
+ "loss": 0.2088,
537
+ "step": 470
538
+ },
539
+ {
540
+ "epoch": 0.770388203430635,
541
+ "grad_norm": 0.5625,
542
+ "learning_rate": 4.634887321417895e-05,
543
+ "logps_logqs/chosen": 0.0732722282409668,
544
+ "logqs/chosen": -0.7719672918319702,
545
+ "logqs/rejected": -0.9193938970565796,
546
+ "logqs_logps/rejected": 0.03625096380710602,
547
+ "loss": 0.2075,
548
+ "step": 480
549
+ },
550
+ {
551
+ "epoch": 0.7864379576687732,
552
+ "grad_norm": 0.90234375,
553
+ "learning_rate": 4.6102191202999065e-05,
554
+ "logps_logqs/chosen": 0.07290570437908173,
555
+ "logqs/chosen": -0.8092619180679321,
556
+ "logqs/rejected": -0.9642523527145386,
557
+ "logqs_logps/rejected": 0.03311945125460625,
558
+ "loss": 0.2045,
559
+ "step": 490
560
+ },
561
+ {
562
+ "epoch": 0.8024877119069114,
563
+ "grad_norm": 0.6015625,
564
+ "learning_rate": 4.5848147753277656e-05,
565
+ "logps_logqs/chosen": 0.07134760171175003,
566
+ "logqs/chosen": -0.7773372530937195,
567
+ "logqs/rejected": -0.9478441476821899,
568
+ "logqs_logps/rejected": 0.03295496851205826,
569
+ "loss": 0.2049,
570
+ "step": 500
571
+ },
572
+ {
573
+ "epoch": 0.8185374661450496,
574
+ "grad_norm": 0.5859375,
575
+ "learning_rate": 4.5586831487339485e-05,
576
+ "logps_logqs/chosen": 0.07219503819942474,
577
+ "logqs/chosen": -0.7950411438941956,
578
+ "logqs/rejected": -0.9455002546310425,
579
+ "logqs_logps/rejected": 0.03765324503183365,
580
+ "loss": 0.208,
581
+ "step": 510
582
+ },
583
+ {
584
+ "epoch": 0.8345872203831879,
585
+ "grad_norm": 0.62109375,
586
+ "learning_rate": 4.531833356461027e-05,
587
+ "logps_logqs/chosen": 0.06849464029073715,
588
+ "logqs/chosen": -0.7819440364837646,
589
+ "logqs/rejected": -0.9597524404525757,
590
+ "logqs_logps/rejected": 0.04056422412395477,
591
+ "loss": 0.2084,
592
+ "step": 520
593
+ },
594
+ {
595
+ "epoch": 0.8506369746213261,
596
+ "grad_norm": 0.6171875,
597
+ "learning_rate": 4.5042747649816006e-05,
598
+ "logps_logqs/chosen": 0.07160626351833344,
599
+ "logqs/chosen": -0.7721427083015442,
600
+ "logqs/rejected": -0.94866544008255,
601
+ "logqs_logps/rejected": 0.03749927878379822,
602
+ "loss": 0.2068,
603
+ "step": 530
604
+ },
605
+ {
606
+ "epoch": 0.8666867288594643,
607
+ "grad_norm": 0.5546875,
608
+ "learning_rate": 4.476016988030826e-05,
609
+ "logps_logqs/chosen": 0.07214485853910446,
610
+ "logqs/chosen": -0.7874671816825867,
611
+ "logqs/rejected": -0.9504098892211914,
612
+ "logqs_logps/rejected": 0.03507527709007263,
613
+ "loss": 0.2045,
614
+ "step": 540
615
+ },
616
+ {
617
+ "epoch": 0.8827364830976026,
618
+ "grad_norm": 0.54296875,
619
+ "learning_rate": 4.447069883252696e-05,
620
+ "logps_logqs/chosen": 0.07395409047603607,
621
+ "logqs/chosen": -0.771978497505188,
622
+ "logqs/rejected": -0.9129235148429871,
623
+ "logqs_logps/rejected": 0.039593033492565155,
624
+ "loss": 0.205,
625
+ "step": 550
626
+ },
627
+ {
628
+ "epoch": 0.8987862373357408,
629
+ "grad_norm": 0.61328125,
630
+ "learning_rate": 4.417443548761227e-05,
631
+ "logps_logqs/chosen": 0.07299650460481644,
632
+ "logqs/chosen": -0.7979342341423035,
633
+ "logqs/rejected": -0.9357426762580872,
634
+ "logqs_logps/rejected": 0.03937726467847824,
635
+ "loss": 0.2083,
636
+ "step": 560
637
+ },
638
+ {
639
+ "epoch": 0.9148359915738791,
640
+ "grad_norm": 0.57421875,
641
+ "learning_rate": 4.387148319617763e-05,
642
+ "logps_logqs/chosen": 0.06836996972560883,
643
+ "logqs/chosen": -0.7659951448440552,
644
+ "logqs/rejected": -0.8732549548149109,
645
+ "logqs_logps/rejected": 0.039081670343875885,
646
+ "loss": 0.203,
647
+ "step": 570
648
+ },
649
+ {
650
+ "epoch": 0.9308857458120172,
651
+ "grad_norm": 0.57421875,
652
+ "learning_rate": 4.356194764225618e-05,
653
+ "logps_logqs/chosen": 0.07397963851690292,
654
+ "logqs/chosen": -0.7891489863395691,
655
+ "logqs/rejected": -0.9294188618659973,
656
+ "logqs_logps/rejected": 0.03416060656309128,
657
+ "loss": 0.2024,
658
+ "step": 580
659
+ },
660
+ {
661
+ "epoch": 0.9469355000501555,
662
+ "grad_norm": 0.703125,
663
+ "learning_rate": 4.3245936806433205e-05,
664
+ "logps_logqs/chosen": 0.07466734945774078,
665
+ "logqs/chosen": -0.7958794832229614,
666
+ "logqs/rejected": -0.953458309173584,
667
+ "logqs_logps/rejected": 0.035235695540905,
668
+ "loss": 0.206,
669
+ "step": 590
670
+ },
671
+ {
672
+ "epoch": 0.9629852542882937,
673
+ "grad_norm": 0.5859375,
674
+ "learning_rate": 4.292356092817739e-05,
675
+ "logps_logqs/chosen": 0.07136549055576324,
676
+ "logqs/chosen": -0.7849777936935425,
677
+ "logqs/rejected": -0.9327009320259094,
678
+ "logqs_logps/rejected": 0.038332488387823105,
679
+ "loss": 0.2041,
680
+ "step": 600
681
+ },
682
+ {
683
+ "epoch": 0.979035008526432,
684
+ "grad_norm": 0.55859375,
685
+ "learning_rate": 4.259493246738409e-05,
686
+ "logps_logqs/chosen": 0.06976237148046494,
687
+ "logqs/chosen": -0.7658575773239136,
688
+ "logqs/rejected": -0.9027583003044128,
689
+ "logqs_logps/rejected": 0.03464614599943161,
690
+ "loss": 0.1993,
691
+ "step": 610
692
+ },
693
+ {
694
+ "epoch": 0.9950847627645701,
695
+ "grad_norm": 0.55078125,
696
+ "learning_rate": 4.226016606514411e-05,
697
+ "logps_logqs/chosen": 0.0706130638718605,
698
+ "logqs/chosen": -0.7847878932952881,
699
+ "logqs/rejected": -0.9509286880493164,
700
+ "logqs_logps/rejected": 0.03653167933225632,
701
+ "loss": 0.2035,
702
+ "step": 620
703
+ }
704
+ ],
705
+ "logging_steps": 10,
706
+ "max_steps": 1869,
707
+ "num_input_tokens_seen": 0,
708
+ "num_train_epochs": 3,
709
+ "save_steps": 500,
710
+ "stateful_callbacks": {
711
+ "TrainerControl": {
712
+ "args": {
713
+ "should_epoch_stop": false,
714
+ "should_evaluate": false,
715
+ "should_log": false,
716
+ "should_save": true,
717
+ "should_training_stop": false
718
+ },
719
+ "attributes": {}
720
+ }
721
+ },
722
+ "total_flos": 0.0,
723
+ "train_batch_size": 2,
724
+ "trial_name": null,
725
+ "trial_params": null
726
+ }
qwen2.5-1.5B-it-distillm2/checkpoint-623/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9fa66895bee5a36a6f750ef897f491a1113f2f79af77c75bdef8187ae10031
3
+ size 6840
qwen2.5-1.5B-it-distillm2/checkpoint-623/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.45.2",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
qwen2.5-1.5B-it-distillm2/eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.999699067108035,
3
+ "eval_logps_logqs/chosen": 0.07430665194988251,
4
+ "eval_logqs/chosen": -1.0447684526443481,
5
+ "eval_logqs/rejected": -1.0230671167373657,
6
+ "eval_logqs_logps/rejected": 0.034393880516290665,
7
+ "eval_loss": 0.17647793889045715,
8
+ "eval_runtime": 1.0546,
9
+ "eval_samples": 10,
10
+ "eval_samples_per_second": 9.482,
11
+ "eval_steps_per_second": 2.845
12
+ }
qwen2.5-1.5B-it-distillm2/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.45.2"
14
+ }
qwen2.5-1.5B-it-distillm2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5-1.5B-it-distillm2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1f325bd96a7e02353ee13f20a8362121741466a9b362e7cfd122a67c5ae6e0
3
+ size 3087935634
qwen2.5-1.5B-it-distillm2/special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "bos_token": "<|endoftext|>",
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|endoftext|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
qwen2.5-1.5B-it-distillm2/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
qwen2.5-1.5B-it-distillm2/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": "<|endoftext|>",
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 2048,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen2.5-1.5B-it-distillm2/train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.999699067108035,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.194109176269121,
5
+ "train_runtime": 23250.2173,
6
+ "train_samples": 79751,
7
+ "train_samples_per_second": 10.29,
8
+ "train_steps_per_second": 0.08
9
+ }
qwen2.5-1.5B-it-distillm2/trainer_state.json ADDED
@@ -0,0 +1,2099 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.999699067108035,
5
+ "eval_steps": 10000000,
6
+ "global_step": 1869,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0016049754238138228,
13
+ "grad_norm": 7.15625,
14
+ "learning_rate": 2.6737967914438503e-07,
15
+ "logps_logqs/chosen": 0.057332463562488556,
16
+ "logqs/chosen": -0.752794086933136,
17
+ "logqs/rejected": -0.7512239813804626,
18
+ "logqs_logps/rejected": 0.1236220970749855,
19
+ "loss": 0.4632,
20
+ "step": 1
21
+ },
22
+ {
23
+ "epoch": 0.016049754238138227,
24
+ "grad_norm": 5.71875,
25
+ "learning_rate": 2.6737967914438504e-06,
26
+ "logps_logqs/chosen": 0.047926291823387146,
27
+ "logqs/chosen": -0.7439535856246948,
28
+ "logqs/rejected": -0.7035253643989563,
29
+ "logqs_logps/rejected": 0.13991409540176392,
30
+ "loss": 0.4557,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.032099508476276455,
35
+ "grad_norm": 1.9296875,
36
+ "learning_rate": 5.347593582887701e-06,
37
+ "logps_logqs/chosen": 0.03161158040165901,
38
+ "logqs/chosen": -0.6941366195678711,
39
+ "logqs/rejected": -0.6452130079269409,
40
+ "logqs_logps/rejected": 0.1449585109949112,
41
+ "loss": 0.4015,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.048149262714414685,
46
+ "grad_norm": 1.8515625,
47
+ "learning_rate": 8.02139037433155e-06,
48
+ "logps_logqs/chosen": 0.03457744047045708,
49
+ "logqs/chosen": -0.6867337226867676,
50
+ "logqs/rejected": -0.6923023462295532,
51
+ "logqs_logps/rejected": 0.12560859322547913,
52
+ "loss": 0.3565,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 0.06419901695255291,
57
+ "grad_norm": 1.234375,
58
+ "learning_rate": 1.0695187165775402e-05,
59
+ "logps_logqs/chosen": 0.05277745798230171,
60
+ "logqs/chosen": -0.7405093908309937,
61
+ "logqs/rejected": -0.7894801497459412,
62
+ "logqs_logps/rejected": 0.0936320573091507,
63
+ "loss": 0.3228,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 0.08024877119069114,
68
+ "grad_norm": 1.0,
69
+ "learning_rate": 1.3368983957219252e-05,
70
+ "logps_logqs/chosen": 0.04658779874444008,
71
+ "logqs/chosen": -0.7268352508544922,
72
+ "logqs/rejected": -0.7926934361457825,
73
+ "logqs_logps/rejected": 0.09889288991689682,
74
+ "loss": 0.3027,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.09629852542882937,
79
+ "grad_norm": 0.92578125,
80
+ "learning_rate": 1.60427807486631e-05,
81
+ "logps_logqs/chosen": 0.04193533584475517,
82
+ "logqs/chosen": -0.7151986956596375,
83
+ "logqs/rejected": -0.7687807083129883,
84
+ "logqs_logps/rejected": 0.09696364402770996,
85
+ "loss": 0.2856,
86
+ "step": 60
87
+ },
88
+ {
89
+ "epoch": 0.1123482796669676,
90
+ "grad_norm": 1.0625,
91
+ "learning_rate": 1.8716577540106954e-05,
92
+ "logps_logqs/chosen": 0.04730736091732979,
93
+ "logqs/chosen": -0.7385339736938477,
94
+ "logqs/rejected": -0.8247607946395874,
95
+ "logqs_logps/rejected": 0.08417234569787979,
96
+ "loss": 0.285,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 0.12839803390510582,
101
+ "grad_norm": 0.890625,
102
+ "learning_rate": 2.1390374331550803e-05,
103
+ "logps_logqs/chosen": 0.047996845096349716,
104
+ "logqs/chosen": -0.7431933879852295,
105
+ "logqs/rejected": -0.8130408525466919,
106
+ "logqs_logps/rejected": 0.08468352258205414,
107
+ "loss": 0.2797,
108
+ "step": 80
109
+ },
110
+ {
111
+ "epoch": 0.14444778814324405,
112
+ "grad_norm": 0.79296875,
113
+ "learning_rate": 2.4064171122994652e-05,
114
+ "logps_logqs/chosen": 0.04765843600034714,
115
+ "logqs/chosen": -0.7173298001289368,
116
+ "logqs/rejected": -0.7709535360336304,
117
+ "logqs_logps/rejected": 0.08819471299648285,
118
+ "loss": 0.2705,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 0.16049754238138228,
123
+ "grad_norm": 0.82421875,
124
+ "learning_rate": 2.6737967914438505e-05,
125
+ "logps_logqs/chosen": 0.040474437177181244,
126
+ "logqs/chosen": -0.710905909538269,
127
+ "logqs/rejected": -0.7959136962890625,
128
+ "logqs_logps/rejected": 0.09612198173999786,
129
+ "loss": 0.2683,
130
+ "step": 100
131
+ },
132
+ {
133
+ "epoch": 0.1765472966195205,
134
+ "grad_norm": 0.7109375,
135
+ "learning_rate": 2.9411764705882354e-05,
136
+ "logps_logqs/chosen": 0.020066609606146812,
137
+ "logqs/chosen": -0.6621483564376831,
138
+ "logqs/rejected": -0.7314690351486206,
139
+ "logqs_logps/rejected": 0.11639375984668732,
140
+ "loss": 0.2682,
141
+ "step": 110
142
+ },
143
+ {
144
+ "epoch": 0.19259705085765874,
145
+ "grad_norm": 0.9921875,
146
+ "learning_rate": 3.20855614973262e-05,
147
+ "logps_logqs/chosen": 0.017146889120340347,
148
+ "logqs/chosen": -0.6599355340003967,
149
+ "logqs/rejected": -0.7124370336532593,
150
+ "logqs_logps/rejected": 0.12015531212091446,
151
+ "loss": 0.268,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.20864680509579697,
156
+ "grad_norm": 1.78125,
157
+ "learning_rate": 3.4759358288770055e-05,
158
+ "logps_logqs/chosen": 0.0369986966252327,
159
+ "logqs/chosen": -0.7184829115867615,
160
+ "logqs/rejected": -0.8139392137527466,
161
+ "logqs_logps/rejected": 0.09705157577991486,
162
+ "loss": 0.2611,
163
+ "step": 130
164
+ },
165
+ {
166
+ "epoch": 0.2246965593339352,
167
+ "grad_norm": 0.87890625,
168
+ "learning_rate": 3.743315508021391e-05,
169
+ "logps_logqs/chosen": 0.07482504099607468,
170
+ "logqs/chosen": -0.7948209643363953,
171
+ "logqs/rejected": -0.9149974584579468,
172
+ "logqs_logps/rejected": 0.04995354264974594,
173
+ "loss": 0.2442,
174
+ "step": 140
175
+ },
176
+ {
177
+ "epoch": 0.24074631357207343,
178
+ "grad_norm": 0.796875,
179
+ "learning_rate": 4.0106951871657754e-05,
180
+ "logps_logqs/chosen": 0.08310296386480331,
181
+ "logqs/chosen": -0.812109649181366,
182
+ "logqs/rejected": -0.9168623089790344,
183
+ "logqs_logps/rejected": 0.041201137006282806,
184
+ "loss": 0.2403,
185
+ "step": 150
186
+ },
187
+ {
188
+ "epoch": 0.25679606781021164,
189
+ "grad_norm": 0.70703125,
190
+ "learning_rate": 4.2780748663101606e-05,
191
+ "logps_logqs/chosen": 0.08498374372720718,
192
+ "logqs/chosen": -0.8101779818534851,
193
+ "logqs/rejected": -0.9302076101303101,
194
+ "logqs_logps/rejected": 0.03671664744615555,
195
+ "loss": 0.2336,
196
+ "step": 160
197
+ },
198
+ {
199
+ "epoch": 0.2728458220483499,
200
+ "grad_norm": 0.85546875,
201
+ "learning_rate": 4.545454545454546e-05,
202
+ "logps_logqs/chosen": 0.08320538699626923,
203
+ "logqs/chosen": -0.8246332406997681,
204
+ "logqs/rejected": -0.9219290018081665,
205
+ "logqs_logps/rejected": 0.03701116889715195,
206
+ "loss": 0.2386,
207
+ "step": 170
208
+ },
209
+ {
210
+ "epoch": 0.2888955762864881,
211
+ "grad_norm": 0.77734375,
212
+ "learning_rate": 4.8128342245989304e-05,
213
+ "logps_logqs/chosen": 0.08179891854524612,
214
+ "logqs/chosen": -0.787845253944397,
215
+ "logqs/rejected": -0.9030144810676575,
216
+ "logqs_logps/rejected": 0.03777966648340225,
217
+ "loss": 0.2315,
218
+ "step": 180
219
+ },
220
+ {
221
+ "epoch": 0.30494533052462636,
222
+ "grad_norm": 0.7734375,
223
+ "learning_rate": 4.9999607536612036e-05,
224
+ "logps_logqs/chosen": 0.08169040083885193,
225
+ "logqs/chosen": -0.8232030868530273,
226
+ "logqs/rejected": -0.9288152456283569,
227
+ "logqs_logps/rejected": 0.039177440106868744,
228
+ "loss": 0.2389,
229
+ "step": 190
230
+ },
231
+ {
232
+ "epoch": 0.32099508476276456,
233
+ "grad_norm": 0.72265625,
234
+ "learning_rate": 4.9992630752499945e-05,
235
+ "logps_logqs/chosen": 0.08127471804618835,
236
+ "logqs/chosen": -0.8097829818725586,
237
+ "logqs/rejected": -0.9110867381095886,
238
+ "logqs_logps/rejected": 0.03667169809341431,
239
+ "loss": 0.2279,
240
+ "step": 200
241
+ },
242
+ {
243
+ "epoch": 0.3370448390009028,
244
+ "grad_norm": 0.71875,
245
+ "learning_rate": 4.997693536122969e-05,
246
+ "logps_logqs/chosen": 0.0788678377866745,
247
+ "logqs/chosen": -0.7994121313095093,
248
+ "logqs/rejected": -0.9303783178329468,
249
+ "logqs_logps/rejected": 0.037957318127155304,
250
+ "loss": 0.2291,
251
+ "step": 210
252
+ },
253
+ {
254
+ "epoch": 0.353094593239041,
255
+ "grad_norm": 0.7109375,
256
+ "learning_rate": 4.995252683809324e-05,
257
+ "logps_logqs/chosen": 0.08248866349458694,
258
+ "logqs/chosen": -0.8128089904785156,
259
+ "logqs/rejected": -0.9362344741821289,
260
+ "logqs_logps/rejected": 0.037949927151203156,
261
+ "loss": 0.229,
262
+ "step": 220
263
+ },
264
+ {
265
+ "epoch": 0.3691443474771793,
266
+ "grad_norm": 0.63671875,
267
+ "learning_rate": 4.9919413697933496e-05,
268
+ "logps_logqs/chosen": 0.0817771628499031,
269
+ "logqs/chosen": -0.7939696311950684,
270
+ "logqs/rejected": -0.9058554768562317,
271
+ "logqs_logps/rejected": 0.04094362258911133,
272
+ "loss": 0.2314,
273
+ "step": 230
274
+ },
275
+ {
276
+ "epoch": 0.3851941017153175,
277
+ "grad_norm": 0.71875,
278
+ "learning_rate": 4.987760749217389e-05,
279
+ "logps_logqs/chosen": 0.076979860663414,
280
+ "logqs/chosen": -0.7874332070350647,
281
+ "logqs/rejected": -0.9303415417671204,
282
+ "logqs_logps/rejected": 0.04022833704948425,
283
+ "loss": 0.2244,
284
+ "step": 240
285
+ },
286
+ {
287
+ "epoch": 0.4012438559534557,
288
+ "grad_norm": 0.83984375,
289
+ "learning_rate": 4.982712280478875e-05,
290
+ "logps_logqs/chosen": 0.07487889379262924,
291
+ "logqs/chosen": -0.7985113859176636,
292
+ "logqs/rejected": -0.9595896005630493,
293
+ "logqs_logps/rejected": 0.04083302244544029,
294
+ "loss": 0.2277,
295
+ "step": 250
296
+ },
297
+ {
298
+ "epoch": 0.41729361019159394,
299
+ "grad_norm": 0.6953125,
300
+ "learning_rate": 4.976797724721567e-05,
301
+ "logps_logqs/chosen": 0.07195514440536499,
302
+ "logqs/chosen": -0.787926971912384,
303
+ "logqs/rejected": -0.897616982460022,
304
+ "logqs_logps/rejected": 0.0396430678665638,
305
+ "loss": 0.2163,
306
+ "step": 260
307
+ },
308
+ {
309
+ "epoch": 0.43334336442973215,
310
+ "grad_norm": 0.7734375,
311
+ "learning_rate": 4.9700191452211806e-05,
312
+ "logps_logqs/chosen": 0.07640247046947479,
313
+ "logqs/chosen": -0.7960728406906128,
314
+ "logqs/rejected": -0.928481936454773,
315
+ "logqs_logps/rejected": 0.03928074985742569,
316
+ "loss": 0.2226,
317
+ "step": 270
318
+ },
319
+ {
320
+ "epoch": 0.4493931186678704,
321
+ "grad_norm": 0.72265625,
322
+ "learning_rate": 4.9623789066656276e-05,
323
+ "logps_logqs/chosen": 0.07622957229614258,
324
+ "logqs/chosen": -0.8077980279922485,
325
+ "logqs/rejected": -0.9034830927848816,
326
+ "logqs_logps/rejected": 0.046734608709812164,
327
+ "loss": 0.2256,
328
+ "step": 280
329
+ },
330
+ {
331
+ "epoch": 0.4654428729060086,
332
+ "grad_norm": 0.78515625,
333
+ "learning_rate": 4.953879674330093e-05,
334
+ "logps_logqs/chosen": 0.07244043052196503,
335
+ "logqs/chosen": -0.7900176644325256,
336
+ "logqs/rejected": -0.891532301902771,
337
+ "logqs_logps/rejected": 0.04387632757425308,
338
+ "loss": 0.217,
339
+ "step": 290
340
+ },
341
+ {
342
+ "epoch": 0.48149262714414687,
343
+ "grad_norm": 0.6796875,
344
+ "learning_rate": 4.944524413147263e-05,
345
+ "logps_logqs/chosen": 0.07289232313632965,
346
+ "logqs/chosen": -0.7856311798095703,
347
+ "logqs/rejected": -0.9099383354187012,
348
+ "logqs_logps/rejected": 0.03974480181932449,
349
+ "loss": 0.2173,
350
+ "step": 300
351
+ },
352
+ {
353
+ "epoch": 0.49754238138228507,
354
+ "grad_norm": 0.66796875,
355
+ "learning_rate": 4.934316386673022e-05,
356
+ "logps_logqs/chosen": 0.07063151150941849,
357
+ "logqs/chosen": -0.7815223932266235,
358
+ "logqs/rejected": -0.9079385995864868,
359
+ "logqs_logps/rejected": 0.041664548218250275,
360
+ "loss": 0.2126,
361
+ "step": 310
362
+ },
363
+ {
364
+ "epoch": 0.5135921356204233,
365
+ "grad_norm": 0.61328125,
366
+ "learning_rate": 4.923259155947964e-05,
367
+ "logps_logqs/chosen": 0.071230448782444,
368
+ "logqs/chosen": -0.8096257448196411,
369
+ "logqs/rejected": -0.9752206802368164,
370
+ "logqs_logps/rejected": 0.03939716890454292,
371
+ "loss": 0.2194,
372
+ "step": 320
373
+ },
374
+ {
375
+ "epoch": 0.5296418898585615,
376
+ "grad_norm": 0.6640625,
377
+ "learning_rate": 4.911356578255139e-05,
378
+ "logps_logqs/chosen": 0.07356056571006775,
379
+ "logqs/chosen": -0.7827819585800171,
380
+ "logqs/rejected": -0.9489104151725769,
381
+ "logqs_logps/rejected": 0.040141116827726364,
382
+ "loss": 0.2145,
383
+ "step": 330
384
+ },
385
+ {
386
+ "epoch": 0.5456916440966998,
387
+ "grad_norm": 0.7109375,
388
+ "learning_rate": 4.898612805774447e-05,
389
+ "logps_logqs/chosen": 0.07881536334753036,
390
+ "logqs/chosen": -0.8095147013664246,
391
+ "logqs/rejected": -0.9313008189201355,
392
+ "logqs_logps/rejected": 0.03702981770038605,
393
+ "loss": 0.2165,
394
+ "step": 340
395
+ },
396
+ {
397
+ "epoch": 0.561741398334838,
398
+ "grad_norm": 0.64453125,
399
+ "learning_rate": 4.885032284134165e-05,
400
+ "logps_logqs/chosen": 0.07632436603307724,
401
+ "logqs/chosen": -0.7979615926742554,
402
+ "logqs/rejected": -0.9368084073066711,
403
+ "logqs_logps/rejected": 0.03654230386018753,
404
+ "loss": 0.2113,
405
+ "step": 350
406
+ },
407
+ {
408
+ "epoch": 0.5777911525729762,
409
+ "grad_norm": 0.62109375,
410
+ "learning_rate": 4.8706197508600984e-05,
411
+ "logps_logqs/chosen": 0.07344283908605576,
412
+ "logqs/chosen": -0.8101444244384766,
413
+ "logqs/rejected": -0.9295485615730286,
414
+ "logqs_logps/rejected": 0.03901313990354538,
415
+ "loss": 0.2151,
416
+ "step": 360
417
+ },
418
+ {
419
+ "epoch": 0.5938409068111145,
420
+ "grad_norm": 0.62890625,
421
+ "learning_rate": 4.855380233722915e-05,
422
+ "logps_logqs/chosen": 0.07421533018350601,
423
+ "logqs/chosen": -0.7826108336448669,
424
+ "logqs/rejected": -0.9229670763015747,
425
+ "logqs_logps/rejected": 0.038314513862133026,
426
+ "loss": 0.2129,
427
+ "step": 370
428
+ },
429
+ {
430
+ "epoch": 0.6098906610492527,
431
+ "grad_norm": 0.62890625,
432
+ "learning_rate": 4.839319048984217e-05,
433
+ "logps_logqs/chosen": 0.07273373752832413,
434
+ "logqs/chosen": -0.7798442840576172,
435
+ "logqs/rejected": -0.9371780157089233,
436
+ "logqs_logps/rejected": 0.03713950887322426,
437
+ "loss": 0.2067,
438
+ "step": 380
439
+ },
440
+ {
441
+ "epoch": 0.6259404152873909,
442
+ "grad_norm": 0.62890625,
443
+ "learning_rate": 4.822441799541979e-05,
444
+ "logps_logqs/chosen": 0.07329441606998444,
445
+ "logqs/chosen": -0.8102380037307739,
446
+ "logqs/rejected": -0.95549476146698,
447
+ "logqs_logps/rejected": 0.036079905927181244,
448
+ "loss": 0.2158,
449
+ "step": 390
450
+ },
451
+ {
452
+ "epoch": 0.6419901695255291,
453
+ "grad_norm": 0.66796875,
454
+ "learning_rate": 4.8047543729759936e-05,
455
+ "logps_logqs/chosen": 0.07404083013534546,
456
+ "logqs/chosen": -0.7847949266433716,
457
+ "logqs/rejected": -0.9250528216362,
458
+ "logqs_logps/rejected": 0.036556728184223175,
459
+ "loss": 0.2105,
460
+ "step": 400
461
+ },
462
+ {
463
+ "epoch": 0.6580399237636674,
464
+ "grad_norm": 0.69140625,
465
+ "learning_rate": 4.786262939494007e-05,
466
+ "logps_logqs/chosen": 0.07539906352758408,
467
+ "logqs/chosen": -0.8072575330734253,
468
+ "logqs/rejected": -0.9661371111869812,
469
+ "logqs_logps/rejected": 0.03662776201963425,
470
+ "loss": 0.2154,
471
+ "step": 410
472
+ },
473
+ {
474
+ "epoch": 0.6740896780018056,
475
+ "grad_norm": 0.66015625,
476
+ "learning_rate": 4.766973949779261e-05,
477
+ "logps_logqs/chosen": 0.0744672566652298,
478
+ "logqs/chosen": -0.787712574005127,
479
+ "logqs/rejected": -0.9350829124450684,
480
+ "logqs_logps/rejected": 0.0353250689804554,
481
+ "loss": 0.2074,
482
+ "step": 420
483
+ },
484
+ {
485
+ "epoch": 0.6901394322399438,
486
+ "grad_norm": 0.62890625,
487
+ "learning_rate": 4.746894132740186e-05,
488
+ "logps_logqs/chosen": 0.07364196330308914,
489
+ "logqs/chosen": -0.7813644409179688,
490
+ "logqs/rejected": -0.9208289980888367,
491
+ "logqs_logps/rejected": 0.0356430858373642,
492
+ "loss": 0.2053,
493
+ "step": 430
494
+ },
495
+ {
496
+ "epoch": 0.706189186478082,
497
+ "grad_norm": 0.56640625,
498
+ "learning_rate": 4.726030493163044e-05,
499
+ "logps_logqs/chosen": 0.07587061077356339,
500
+ "logqs/chosen": -0.7853146195411682,
501
+ "logqs/rejected": -0.8944910168647766,
502
+ "logqs_logps/rejected": 0.03531279042363167,
503
+ "loss": 0.2078,
504
+ "step": 440
505
+ },
506
+ {
507
+ "epoch": 0.7222389407162203,
508
+ "grad_norm": 0.68359375,
509
+ "learning_rate": 4.7043903092683314e-05,
510
+ "logps_logqs/chosen": 0.07814273238182068,
511
+ "logqs/chosen": -0.8133522272109985,
512
+ "logqs/rejected": -0.9508693814277649,
513
+ "logqs_logps/rejected": 0.030778918415308,
514
+ "loss": 0.2104,
515
+ "step": 450
516
+ },
517
+ {
518
+ "epoch": 0.7382886949543586,
519
+ "grad_norm": 0.72265625,
520
+ "learning_rate": 4.6819811301717885e-05,
521
+ "logps_logqs/chosen": 0.07763786613941193,
522
+ "logqs/chosen": -0.8061367869377136,
523
+ "logqs/rejected": -0.9445575475692749,
524
+ "logqs_logps/rejected": 0.035373255610466,
525
+ "loss": 0.2084,
526
+ "step": 460
527
+ },
528
+ {
529
+ "epoch": 0.7543384491924967,
530
+ "grad_norm": 0.59765625,
531
+ "learning_rate": 4.6588107732509134e-05,
532
+ "logps_logqs/chosen": 0.07325359433889389,
533
+ "logqs/chosen": -0.7970572710037231,
534
+ "logqs/rejected": -0.9243482351303101,
535
+ "logqs_logps/rejected": 0.03834759443998337,
536
+ "loss": 0.2088,
537
+ "step": 470
538
+ },
539
+ {
540
+ "epoch": 0.770388203430635,
541
+ "grad_norm": 0.5625,
542
+ "learning_rate": 4.634887321417895e-05,
543
+ "logps_logqs/chosen": 0.0732722282409668,
544
+ "logqs/chosen": -0.7719672918319702,
545
+ "logqs/rejected": -0.9193938970565796,
546
+ "logqs_logps/rejected": 0.03625096380710602,
547
+ "loss": 0.2075,
548
+ "step": 480
549
+ },
550
+ {
551
+ "epoch": 0.7864379576687732,
552
+ "grad_norm": 0.90234375,
553
+ "learning_rate": 4.6102191202999065e-05,
554
+ "logps_logqs/chosen": 0.07290570437908173,
555
+ "logqs/chosen": -0.8092619180679321,
556
+ "logqs/rejected": -0.9642523527145386,
557
+ "logqs_logps/rejected": 0.03311945125460625,
558
+ "loss": 0.2045,
559
+ "step": 490
560
+ },
561
+ {
562
+ "epoch": 0.8024877119069114,
563
+ "grad_norm": 0.6015625,
564
+ "learning_rate": 4.5848147753277656e-05,
565
+ "logps_logqs/chosen": 0.07134760171175003,
566
+ "logqs/chosen": -0.7773372530937195,
567
+ "logqs/rejected": -0.9478441476821899,
568
+ "logqs_logps/rejected": 0.03295496851205826,
569
+ "loss": 0.2049,
570
+ "step": 500
571
+ },
572
+ {
573
+ "epoch": 0.8185374661450496,
574
+ "grad_norm": 0.5859375,
575
+ "learning_rate": 4.5586831487339485e-05,
576
+ "logps_logqs/chosen": 0.07219503819942474,
577
+ "logqs/chosen": -0.7950411438941956,
578
+ "logqs/rejected": -0.9455002546310425,
579
+ "logqs_logps/rejected": 0.03765324503183365,
580
+ "loss": 0.208,
581
+ "step": 510
582
+ },
583
+ {
584
+ "epoch": 0.8345872203831879,
585
+ "grad_norm": 0.62109375,
586
+ "learning_rate": 4.531833356461027e-05,
587
+ "logps_logqs/chosen": 0.06849464029073715,
588
+ "logqs/chosen": -0.7819440364837646,
589
+ "logqs/rejected": -0.9597524404525757,
590
+ "logqs_logps/rejected": 0.04056422412395477,
591
+ "loss": 0.2084,
592
+ "step": 520
593
+ },
594
+ {
595
+ "epoch": 0.8506369746213261,
596
+ "grad_norm": 0.6171875,
597
+ "learning_rate": 4.5042747649816006e-05,
598
+ "logps_logqs/chosen": 0.07160626351833344,
599
+ "logqs/chosen": -0.7721427083015442,
600
+ "logqs/rejected": -0.94866544008255,
601
+ "logqs_logps/rejected": 0.03749927878379822,
602
+ "loss": 0.2068,
603
+ "step": 530
604
+ },
605
+ {
606
+ "epoch": 0.8666867288594643,
607
+ "grad_norm": 0.5546875,
608
+ "learning_rate": 4.476016988030826e-05,
609
+ "logps_logqs/chosen": 0.07214485853910446,
610
+ "logqs/chosen": -0.7874671816825867,
611
+ "logqs/rejected": -0.9504098892211914,
612
+ "logqs_logps/rejected": 0.03507527709007263,
613
+ "loss": 0.2045,
614
+ "step": 540
615
+ },
616
+ {
617
+ "epoch": 0.8827364830976026,
618
+ "grad_norm": 0.54296875,
619
+ "learning_rate": 4.447069883252696e-05,
620
+ "logps_logqs/chosen": 0.07395409047603607,
621
+ "logqs/chosen": -0.771978497505188,
622
+ "logqs/rejected": -0.9129235148429871,
623
+ "logqs_logps/rejected": 0.039593033492565155,
624
+ "loss": 0.205,
625
+ "step": 550
626
+ },
627
+ {
628
+ "epoch": 0.8987862373357408,
629
+ "grad_norm": 0.61328125,
630
+ "learning_rate": 4.417443548761227e-05,
631
+ "logps_logqs/chosen": 0.07299650460481644,
632
+ "logqs/chosen": -0.7979342341423035,
633
+ "logqs/rejected": -0.9357426762580872,
634
+ "logqs_logps/rejected": 0.03937726467847824,
635
+ "loss": 0.2083,
636
+ "step": 560
637
+ },
638
+ {
639
+ "epoch": 0.9148359915738791,
640
+ "grad_norm": 0.57421875,
641
+ "learning_rate": 4.387148319617763e-05,
642
+ "logps_logqs/chosen": 0.06836996972560883,
643
+ "logqs/chosen": -0.7659951448440552,
644
+ "logqs/rejected": -0.8732549548149109,
645
+ "logqs_logps/rejected": 0.039081670343875885,
646
+ "loss": 0.203,
647
+ "step": 570
648
+ },
649
+ {
650
+ "epoch": 0.9308857458120172,
651
+ "grad_norm": 0.57421875,
652
+ "learning_rate": 4.356194764225618e-05,
653
+ "logps_logqs/chosen": 0.07397963851690292,
654
+ "logqs/chosen": -0.7891489863395691,
655
+ "logqs/rejected": -0.9294188618659973,
656
+ "logqs_logps/rejected": 0.03416060656309128,
657
+ "loss": 0.2024,
658
+ "step": 580
659
+ },
660
+ {
661
+ "epoch": 0.9469355000501555,
662
+ "grad_norm": 0.703125,
663
+ "learning_rate": 4.3245936806433205e-05,
664
+ "logps_logqs/chosen": 0.07466734945774078,
665
+ "logqs/chosen": -0.7958794832229614,
666
+ "logqs/rejected": -0.953458309173584,
667
+ "logqs_logps/rejected": 0.035235695540905,
668
+ "loss": 0.206,
669
+ "step": 590
670
+ },
671
+ {
672
+ "epoch": 0.9629852542882937,
673
+ "grad_norm": 0.5859375,
674
+ "learning_rate": 4.292356092817739e-05,
675
+ "logps_logqs/chosen": 0.07136549055576324,
676
+ "logqs/chosen": -0.7849777936935425,
677
+ "logqs/rejected": -0.9327009320259094,
678
+ "logqs_logps/rejected": 0.038332488387823105,
679
+ "loss": 0.2041,
680
+ "step": 600
681
+ },
682
+ {
683
+ "epoch": 0.979035008526432,
684
+ "grad_norm": 0.55859375,
685
+ "learning_rate": 4.259493246738409e-05,
686
+ "logps_logqs/chosen": 0.06976237148046494,
687
+ "logqs/chosen": -0.7658575773239136,
688
+ "logqs/rejected": -0.9027583003044128,
689
+ "logqs_logps/rejected": 0.03464614599943161,
690
+ "loss": 0.1993,
691
+ "step": 610
692
+ },
693
+ {
694
+ "epoch": 0.9950847627645701,
695
+ "grad_norm": 0.55078125,
696
+ "learning_rate": 4.226016606514411e-05,
697
+ "logps_logqs/chosen": 0.0706130638718605,
698
+ "logqs/chosen": -0.7847878932952881,
699
+ "logqs/rejected": -0.9509286880493164,
700
+ "logqs_logps/rejected": 0.03653167933225632,
701
+ "loss": 0.2035,
702
+ "step": 620
703
+ },
704
+ {
705
+ "epoch": 1.0111345170027084,
706
+ "grad_norm": 0.53515625,
707
+ "learning_rate": 4.191937850375136e-05,
708
+ "logps_logqs/chosen": 0.06913185119628906,
709
+ "logqs/chosen": -0.7918249368667603,
710
+ "logqs/rejected": -0.9458308219909668,
711
+ "logqs_logps/rejected": 0.030862990766763687,
712
+ "loss": 0.1886,
713
+ "step": 630
714
+ },
715
+ {
716
+ "epoch": 1.0271842712408465,
717
+ "grad_norm": 0.5625,
718
+ "learning_rate": 4.157268866596381e-05,
719
+ "logps_logqs/chosen": 0.0690259039402008,
720
+ "logqs/chosen": -0.7895857095718384,
721
+ "logqs/rejected": -0.9810531735420227,
722
+ "logqs_logps/rejected": 0.03111192025244236,
723
+ "loss": 0.187,
724
+ "step": 640
725
+ },
726
+ {
727
+ "epoch": 1.043234025478985,
728
+ "grad_norm": 0.53125,
729
+ "learning_rate": 4.1220217493531494e-05,
730
+ "logps_logqs/chosen": 0.06900795549154282,
731
+ "logqs/chosen": -0.7902609705924988,
732
+ "logqs/rejected": -0.9747546911239624,
733
+ "logqs_logps/rejected": 0.031710296869277954,
734
+ "loss": 0.1845,
735
+ "step": 650
736
+ },
737
+ {
738
+ "epoch": 1.059283779717123,
739
+ "grad_norm": 0.515625,
740
+ "learning_rate": 4.086208794500637e-05,
741
+ "logps_logqs/chosen": 0.07102300226688385,
742
+ "logqs/chosen": -0.7767165899276733,
743
+ "logqs/rejected": -0.9414197206497192,
744
+ "logqs_logps/rejected": 0.030909577384591103,
745
+ "loss": 0.1807,
746
+ "step": 660
747
+ },
748
+ {
749
+ "epoch": 1.0753335339552612,
750
+ "grad_norm": 0.494140625,
751
+ "learning_rate": 4.049842495284858e-05,
752
+ "logps_logqs/chosen": 0.06985093653202057,
753
+ "logqs/chosen": -0.7880310416221619,
754
+ "logqs/rejected": -0.9546613693237305,
755
+ "logqs_logps/rejected": 0.030261676758527756,
756
+ "loss": 0.186,
757
+ "step": 670
758
+ },
759
+ {
760
+ "epoch": 1.0913832881933996,
761
+ "grad_norm": 0.50390625,
762
+ "learning_rate": 4.012935537984414e-05,
763
+ "logps_logqs/chosen": 0.07058895379304886,
764
+ "logqs/chosen": -0.7935397624969482,
765
+ "logqs/rejected": -1.0117493867874146,
766
+ "logqs_logps/rejected": 0.029221097007393837,
767
+ "loss": 0.1835,
768
+ "step": 680
769
+ },
770
+ {
771
+ "epoch": 1.1074330424315377,
772
+ "grad_norm": 0.6171875,
773
+ "learning_rate": 3.9755007974849135e-05,
774
+ "logps_logqs/chosen": 0.06916572153568268,
775
+ "logqs/chosen": -0.7878638505935669,
776
+ "logqs/rejected": -0.9999720454216003,
777
+ "logqs_logps/rejected": 0.030744653195142746,
778
+ "loss": 0.1857,
779
+ "step": 690
780
+ },
781
+ {
782
+ "epoch": 1.123482796669676,
783
+ "grad_norm": 0.57421875,
784
+ "learning_rate": 3.93755133278762e-05,
785
+ "logps_logqs/chosen": 0.07014169543981552,
786
+ "logqs/chosen": -0.7882756590843201,
787
+ "logqs/rejected": -1.0001386404037476,
788
+ "logqs_logps/rejected": 0.02842717245221138,
789
+ "loss": 0.1832,
790
+ "step": 700
791
+ },
792
+ {
793
+ "epoch": 1.1395325509078142,
794
+ "grad_norm": 0.59375,
795
+ "learning_rate": 3.899100382453845e-05,
796
+ "logps_logqs/chosen": 0.0701603814959526,
797
+ "logqs/chosen": -0.7642520070075989,
798
+ "logqs/rejected": -0.9544415473937988,
799
+ "logqs_logps/rejected": 0.027573522180318832,
800
+ "loss": 0.1806,
801
+ "step": 710
802
+ },
803
+ {
804
+ "epoch": 1.1555823051459524,
805
+ "grad_norm": 0.50390625,
806
+ "learning_rate": 3.8601613599867156e-05,
807
+ "logps_logqs/chosen": 0.06890274584293365,
808
+ "logqs/chosen": -0.7708860635757446,
809
+ "logqs/rejected": -0.9537287950515747,
810
+ "logqs_logps/rejected": 0.028619807213544846,
811
+ "loss": 0.1804,
812
+ "step": 720
813
+ },
814
+ {
815
+ "epoch": 1.1716320593840908,
816
+ "grad_norm": 0.609375,
817
+ "learning_rate": 3.8207478491519216e-05,
818
+ "logps_logqs/chosen": 0.0682586207985878,
819
+ "logqs/chosen": -0.7696245908737183,
820
+ "logqs/rejected": -0.9157883524894714,
821
+ "logqs_logps/rejected": 0.028189942240715027,
822
+ "loss": 0.1814,
823
+ "step": 730
824
+ },
825
+ {
826
+ "epoch": 1.187681813622229,
827
+ "grad_norm": 0.59375,
828
+ "learning_rate": 3.780873599239044e-05,
829
+ "logps_logqs/chosen": 0.07058210670948029,
830
+ "logqs/chosen": -0.801421046257019,
831
+ "logqs/rejected": -0.9591751098632812,
832
+ "logqs_logps/rejected": 0.03063536249101162,
833
+ "loss": 0.1853,
834
+ "step": 740
835
+ },
836
+ {
837
+ "epoch": 1.203731567860367,
838
+ "grad_norm": 0.55859375,
839
+ "learning_rate": 3.740552520265167e-05,
840
+ "logps_logqs/chosen": 0.06886611133813858,
841
+ "logqs/chosen": -0.792157769203186,
842
+ "logqs/rejected": -0.9370824694633484,
843
+ "logqs_logps/rejected": 0.02984955906867981,
844
+ "loss": 0.1833,
845
+ "step": 750
846
+ },
847
+ {
848
+ "epoch": 1.2197813220985054,
849
+ "grad_norm": 0.55859375,
850
+ "learning_rate": 3.699798678122403e-05,
851
+ "logps_logqs/chosen": 0.07179007679224014,
852
+ "logqs/chosen": -0.7622597813606262,
853
+ "logqs/rejected": -0.9382703900337219,
854
+ "logqs_logps/rejected": 0.029157549142837524,
855
+ "loss": 0.1793,
856
+ "step": 760
857
+ },
858
+ {
859
+ "epoch": 1.2358310763366436,
860
+ "grad_norm": 0.546875,
861
+ "learning_rate": 3.6586262896710476e-05,
862
+ "logps_logqs/chosen": 0.06790535151958466,
863
+ "logqs/chosen": -0.7676440477371216,
864
+ "logqs/rejected": -0.928793728351593,
865
+ "logqs_logps/rejected": 0.03030979633331299,
866
+ "loss": 0.1808,
867
+ "step": 770
868
+ },
869
+ {
870
+ "epoch": 1.2518808305747817,
871
+ "grad_norm": 0.51171875,
872
+ "learning_rate": 3.61704971778007e-05,
873
+ "logps_logqs/chosen": 0.06736615300178528,
874
+ "logqs/chosen": -0.770586371421814,
875
+ "logqs/rejected": -0.9197053909301758,
876
+ "logqs_logps/rejected": 0.03245236724615097,
877
+ "loss": 0.1799,
878
+ "step": 780
879
+ },
880
+ {
881
+ "epoch": 1.26793058481292,
882
+ "grad_norm": 0.4765625,
883
+ "learning_rate": 3.575083466316664e-05,
884
+ "logps_logqs/chosen": 0.06956754624843597,
885
+ "logqs/chosen": -0.7774965167045593,
886
+ "logqs/rejected": -0.9702298045158386,
887
+ "logqs_logps/rejected": 0.030748773366212845,
888
+ "loss": 0.1831,
889
+ "step": 790
890
+ },
891
+ {
892
+ "epoch": 1.2839803390510582,
893
+ "grad_norm": 0.5078125,
894
+ "learning_rate": 3.532742175086621e-05,
895
+ "logps_logqs/chosen": 0.06920956075191498,
896
+ "logqs/chosen": -0.7709556818008423,
897
+ "logqs/rejected": -0.9387216567993164,
898
+ "logqs_logps/rejected": 0.027966167777776718,
899
+ "loss": 0.1788,
900
+ "step": 800
901
+ },
902
+ {
903
+ "epoch": 1.3000300932891964,
904
+ "grad_norm": 0.54296875,
905
+ "learning_rate": 3.490040614727272e-05,
906
+ "logps_logqs/chosen": 0.06927359104156494,
907
+ "logqs/chosen": -0.7772814035415649,
908
+ "logqs/rejected": -0.9691821932792664,
909
+ "logqs_logps/rejected": 0.028416061773896217,
910
+ "loss": 0.1781,
911
+ "step": 810
912
+ },
913
+ {
914
+ "epoch": 1.3160798475273348,
915
+ "grad_norm": 0.5078125,
916
+ "learning_rate": 3.446993681554797e-05,
917
+ "logps_logqs/chosen": 0.07202474772930145,
918
+ "logqs/chosen": -0.7855108380317688,
919
+ "logqs/rejected": -0.9708096385002136,
920
+ "logqs_logps/rejected": 0.030036652460694313,
921
+ "loss": 0.1793,
922
+ "step": 820
923
+ },
924
+ {
925
+ "epoch": 1.332129601765473,
926
+ "grad_norm": 0.65234375,
927
+ "learning_rate": 3.403616392367681e-05,
928
+ "logps_logqs/chosen": 0.07054628431797028,
929
+ "logqs/chosen": -0.7540086507797241,
930
+ "logqs/rejected": -0.9042137265205383,
931
+ "logqs_logps/rejected": 0.03296629339456558,
932
+ "loss": 0.1818,
933
+ "step": 830
934
+ },
935
+ {
936
+ "epoch": 1.3481793560036113,
937
+ "grad_norm": 0.5078125,
938
+ "learning_rate": 3.3599238792081575e-05,
939
+ "logps_logqs/chosen": 0.07161605358123779,
940
+ "logqs/chosen": -0.7662056684494019,
941
+ "logqs/rejected": -0.9423080682754517,
942
+ "logqs_logps/rejected": 0.031152984127402306,
943
+ "loss": 0.1786,
944
+ "step": 840
945
+ },
946
+ {
947
+ "epoch": 1.3642291102417494,
948
+ "grad_norm": 0.5390625,
949
+ "learning_rate": 3.315931384083431e-05,
950
+ "logps_logqs/chosen": 0.0672772079706192,
951
+ "logqs/chosen": -0.7698653936386108,
952
+ "logqs/rejected": -0.9608929753303528,
953
+ "logqs_logps/rejected": 0.031727343797683716,
954
+ "loss": 0.181,
955
+ "step": 850
956
+ },
957
+ {
958
+ "epoch": 1.3802788644798878,
959
+ "grad_norm": 0.5390625,
960
+ "learning_rate": 3.2716542536485474e-05,
961
+ "logps_logqs/chosen": 0.07064563035964966,
962
+ "logqs/chosen": -0.7671880125999451,
963
+ "logqs/rejected": -0.9258206486701965,
964
+ "logqs_logps/rejected": 0.03185782581567764,
965
+ "loss": 0.1802,
966
+ "step": 860
967
+ },
968
+ {
969
+ "epoch": 1.396328618718026,
970
+ "grad_norm": 0.59375,
971
+ "learning_rate": 3.2271079338527626e-05,
972
+ "logps_logqs/chosen": 0.07001273334026337,
973
+ "logqs/chosen": -0.7942059636116028,
974
+ "logqs/rejected": -0.9803145527839661,
975
+ "logqs_logps/rejected": 0.02900245226919651,
976
+ "loss": 0.1783,
977
+ "step": 870
978
+ },
979
+ {
980
+ "epoch": 1.412378372956164,
981
+ "grad_norm": 0.48828125,
982
+ "learning_rate": 3.1823079645512655e-05,
983
+ "logps_logqs/chosen": 0.06748739629983902,
984
+ "logqs/chosen": -0.7796913385391235,
985
+ "logqs/rejected": -1.0028189420700073,
986
+ "logqs_logps/rejected": 0.028837621212005615,
987
+ "loss": 0.1791,
988
+ "step": 880
989
+ },
990
+ {
991
+ "epoch": 1.4284281271943025,
992
+ "grad_norm": 0.5234375,
993
+ "learning_rate": 3.137269974084139e-05,
994
+ "logps_logqs/chosen": 0.07031874358654022,
995
+ "logqs/chosen": -0.7788208723068237,
996
+ "logqs/rejected": -0.9545127153396606,
997
+ "logqs_logps/rejected": 0.029628973454236984,
998
+ "loss": 0.179,
999
+ "step": 890
1000
+ },
1001
+ {
1002
+ "epoch": 1.4444778814324406,
1003
+ "grad_norm": 0.486328125,
1004
+ "learning_rate": 3.092009673824469e-05,
1005
+ "logps_logqs/chosen": 0.07582792639732361,
1006
+ "logqs/chosen": -0.8078464269638062,
1007
+ "logqs/rejected": -0.9633975028991699,
1008
+ "logqs_logps/rejected": 0.02939186617732048,
1009
+ "loss": 0.1885,
1010
+ "step": 900
1011
+ },
1012
+ {
1013
+ "epoch": 1.4605276356705787,
1014
+ "grad_norm": 0.53515625,
1015
+ "learning_rate": 3.0465428526974665e-05,
1016
+ "logps_logqs/chosen": 0.07225798070430756,
1017
+ "logqs/chosen": -0.7663235068321228,
1018
+ "logqs/rejected": -0.9817646741867065,
1019
+ "logqs_logps/rejected": 0.028154581785202026,
1020
+ "loss": 0.178,
1021
+ "step": 910
1022
+ },
1023
+ {
1024
+ "epoch": 1.4765773899087171,
1025
+ "grad_norm": 0.49609375,
1026
+ "learning_rate": 3.000885371672554e-05,
1027
+ "logps_logqs/chosen": 0.07049473375082016,
1028
+ "logqs/chosen": -0.7796770930290222,
1029
+ "logqs/rejected": -0.9203440546989441,
1030
+ "logqs_logps/rejected": 0.028858328238129616,
1031
+ "loss": 0.1765,
1032
+ "step": 920
1033
+ },
1034
+ {
1035
+ "epoch": 1.4926271441468553,
1036
+ "grad_norm": 0.53125,
1037
+ "learning_rate": 2.9550531582303082e-05,
1038
+ "logps_logqs/chosen": 0.07375530898571014,
1039
+ "logqs/chosen": -0.7871274352073669,
1040
+ "logqs/rejected": -0.950308620929718,
1041
+ "logqs_logps/rejected": 0.030121903866529465,
1042
+ "loss": 0.1821,
1043
+ "step": 930
1044
+ },
1045
+ {
1046
+ "epoch": 1.5086768983849934,
1047
+ "grad_norm": 0.462890625,
1048
+ "learning_rate": 2.909062200806208e-05,
1049
+ "logps_logqs/chosen": 0.06755580753087997,
1050
+ "logqs/chosen": -0.7797432541847229,
1051
+ "logqs/rejected": -0.9579198956489563,
1052
+ "logqs_logps/rejected": 0.027673590928316116,
1053
+ "loss": 0.1738,
1054
+ "step": 940
1055
+ },
1056
+ {
1057
+ "epoch": 1.5247266526231318,
1058
+ "grad_norm": 0.451171875,
1059
+ "learning_rate": 2.8629285432131083e-05,
1060
+ "logps_logqs/chosen": 0.06972555816173553,
1061
+ "logqs/chosen": -0.7843119502067566,
1062
+ "logqs/rejected": -0.971518874168396,
1063
+ "logqs_logps/rejected": 0.026922276243567467,
1064
+ "loss": 0.1771,
1065
+ "step": 950
1066
+ },
1067
+ {
1068
+ "epoch": 1.54077640686127,
1069
+ "grad_norm": 0.5234375,
1070
+ "learning_rate": 2.8166682790444116e-05,
1071
+ "logps_logqs/chosen": 0.0729864090681076,
1072
+ "logqs/chosen": -0.7914843559265137,
1073
+ "logqs/rejected": -0.9729417562484741,
1074
+ "logqs_logps/rejected": 0.02641429379582405,
1075
+ "loss": 0.1788,
1076
+ "step": 960
1077
+ },
1078
+ {
1079
+ "epoch": 1.556826161099408,
1080
+ "grad_norm": 0.4921875,
1081
+ "learning_rate": 2.7702975460598547e-05,
1082
+ "logps_logqs/chosen": 0.06970744580030441,
1083
+ "logqs/chosen": -0.7867680788040161,
1084
+ "logqs/rejected": -0.9702849388122559,
1085
+ "logqs_logps/rejected": 0.026300692930817604,
1086
+ "loss": 0.1753,
1087
+ "step": 970
1088
+ },
1089
+ {
1090
+ "epoch": 1.5728759153375464,
1091
+ "grad_norm": 0.51171875,
1092
+ "learning_rate": 2.723832520555905e-05,
1093
+ "logps_logqs/chosen": 0.06844428926706314,
1094
+ "logqs/chosen": -0.7796690464019775,
1095
+ "logqs/rejected": -0.9721530079841614,
1096
+ "logqs_logps/rejected": 0.03003394976258278,
1097
+ "loss": 0.1775,
1098
+ "step": 980
1099
+ },
1100
+ {
1101
+ "epoch": 1.5889256695756846,
1102
+ "grad_norm": 0.50390625,
1103
+ "learning_rate": 2.677289411722702e-05,
1104
+ "logps_logqs/chosen": 0.06915868073701859,
1105
+ "logqs/chosen": -0.7836161851882935,
1106
+ "logqs/rejected": -1.0066392421722412,
1107
+ "logqs_logps/rejected": 0.029805105179548264,
1108
+ "loss": 0.1834,
1109
+ "step": 990
1110
+ },
1111
+ {
1112
+ "epoch": 1.6049754238138227,
1113
+ "grad_norm": 0.52734375,
1114
+ "learning_rate": 2.63068445598953e-05,
1115
+ "logps_logqs/chosen": 0.07069100439548492,
1116
+ "logqs/chosen": -0.7810501456260681,
1117
+ "logqs/rejected": -0.9559534788131714,
1118
+ "logqs_logps/rejected": 0.02675846591591835,
1119
+ "loss": 0.1799,
1120
+ "step": 1000
1121
+ },
1122
+ {
1123
+ "epoch": 1.6210251780519611,
1124
+ "grad_norm": 0.455078125,
1125
+ "learning_rate": 2.5840339113607854e-05,
1126
+ "logps_logqs/chosen": 0.06957536935806274,
1127
+ "logqs/chosen": -0.776276171207428,
1128
+ "logqs/rejected": -0.9653003811836243,
1129
+ "logqs_logps/rejected": 0.029132988303899765,
1130
+ "loss": 0.1764,
1131
+ "step": 1010
1132
+ },
1133
+ {
1134
+ "epoch": 1.6370749322900993,
1135
+ "grad_norm": 0.4921875,
1136
+ "learning_rate": 2.53735405174442e-05,
1137
+ "logps_logqs/chosen": 0.06984798610210419,
1138
+ "logqs/chosen": -0.7904434204101562,
1139
+ "logqs/rejected": -0.9736318588256836,
1140
+ "logqs_logps/rejected": 0.02975938282907009,
1141
+ "loss": 0.1778,
1142
+ "step": 1020
1143
+ },
1144
+ {
1145
+ "epoch": 1.6531246865282374,
1146
+ "grad_norm": 0.50390625,
1147
+ "learning_rate": 2.490661161274835e-05,
1148
+ "logps_logqs/chosen": 0.06985798478126526,
1149
+ "logqs/chosen": -0.7864385843276978,
1150
+ "logqs/rejected": -0.9617294073104858,
1151
+ "logqs_logps/rejected": 0.027367640286684036,
1152
+ "loss": 0.1774,
1153
+ "step": 1030
1154
+ },
1155
+ {
1156
+ "epoch": 1.6691744407663758,
1157
+ "grad_norm": 0.4453125,
1158
+ "learning_rate": 2.443971528632205e-05,
1159
+ "logps_logqs/chosen": 0.07047738134860992,
1160
+ "logqs/chosen": -0.7980072498321533,
1161
+ "logqs/rejected": -0.9655786752700806,
1162
+ "logqs_logps/rejected": 0.028297554701566696,
1163
+ "loss": 0.1779,
1164
+ "step": 1040
1165
+ },
1166
+ {
1167
+ "epoch": 1.6852241950045141,
1168
+ "grad_norm": 0.515625,
1169
+ "learning_rate": 2.3973014413602238e-05,
1170
+ "logps_logqs/chosen": 0.07119600474834442,
1171
+ "logqs/chosen": -0.7848029732704163,
1172
+ "logqs/rejected": -1.003758192062378,
1173
+ "logqs_logps/rejected": 0.029630709439516068,
1174
+ "loss": 0.1808,
1175
+ "step": 1050
1176
+ },
1177
+ {
1178
+ "epoch": 1.701273949242652,
1179
+ "grad_norm": 0.498046875,
1180
+ "learning_rate": 2.3506671801842364e-05,
1181
+ "logps_logqs/chosen": 0.06773122400045395,
1182
+ "logqs/chosen": -0.8044145703315735,
1183
+ "logqs/rejected": -0.9743335843086243,
1184
+ "logqs_logps/rejected": 0.027200300246477127,
1185
+ "loss": 0.1769,
1186
+ "step": 1060
1187
+ },
1188
+ {
1189
+ "epoch": 1.7173237034807904,
1190
+ "grad_norm": 0.4921875,
1191
+ "learning_rate": 2.3040850133317597e-05,
1192
+ "logps_logqs/chosen": 0.07197652757167816,
1193
+ "logqs/chosen": -0.7732763886451721,
1194
+ "logqs/rejected": -0.9615533947944641,
1195
+ "logqs_logps/rejected": 0.02922072447836399,
1196
+ "loss": 0.1801,
1197
+ "step": 1070
1198
+ },
1199
+ {
1200
+ "epoch": 1.7333734577189288,
1201
+ "grad_norm": 0.48046875,
1202
+ "learning_rate": 2.2575711908573548e-05,
1203
+ "logps_logqs/chosen": 0.06941990554332733,
1204
+ "logqs/chosen": -0.7937701940536499,
1205
+ "logqs/rejected": -1.0073679685592651,
1206
+ "logqs_logps/rejected": 0.026350444182753563,
1207
+ "loss": 0.1764,
1208
+ "step": 1080
1209
+ },
1210
+ {
1211
+ "epoch": 1.749423211957067,
1212
+ "grad_norm": 0.447265625,
1213
+ "learning_rate": 2.2111419389738435e-05,
1214
+ "logps_logqs/chosen": 0.07039657980203629,
1215
+ "logqs/chosen": -0.7764157056808472,
1216
+ "logqs/rejected": -0.9544513821601868,
1217
+ "logqs_logps/rejected": 0.02772611379623413,
1218
+ "loss": 0.1755,
1219
+ "step": 1090
1220
+ },
1221
+ {
1222
+ "epoch": 1.765472966195205,
1223
+ "grad_norm": 0.5390625,
1224
+ "learning_rate": 2.1648134543918423e-05,
1225
+ "logps_logqs/chosen": 0.07292209565639496,
1226
+ "logqs/chosen": -0.7701107859611511,
1227
+ "logqs/rejected": -0.9618105888366699,
1228
+ "logqs_logps/rejected": 0.027484769001603127,
1229
+ "loss": 0.1762,
1230
+ "step": 1100
1231
+ },
1232
+ {
1233
+ "epoch": 1.7815227204333435,
1234
+ "grad_norm": 0.427734375,
1235
+ "learning_rate": 2.1186018986695842e-05,
1236
+ "logps_logqs/chosen": 0.07149146497249603,
1237
+ "logqs/chosen": -0.7753132581710815,
1238
+ "logqs/rejected": -0.926436722278595,
1239
+ "logqs_logps/rejected": 0.029845744371414185,
1240
+ "loss": 0.1779,
1241
+ "step": 1110
1242
+ },
1243
+ {
1244
+ "epoch": 1.7975724746714816,
1245
+ "grad_norm": 0.45703125,
1246
+ "learning_rate": 2.0725233925750063e-05,
1247
+ "logps_logqs/chosen": 0.06752609461545944,
1248
+ "logqs/chosen": -0.7796769738197327,
1249
+ "logqs/rejected": -0.9575828313827515,
1250
+ "logqs_logps/rejected": 0.03261734917759895,
1251
+ "loss": 0.1771,
1252
+ "step": 1120
1253
+ },
1254
+ {
1255
+ "epoch": 1.8136222289096198,
1256
+ "grad_norm": 0.54296875,
1257
+ "learning_rate": 2.026594010462068e-05,
1258
+ "logps_logqs/chosen": 0.07001613825559616,
1259
+ "logqs/chosen": -0.7918448448181152,
1260
+ "logqs/rejected": -0.9749159812927246,
1261
+ "logqs_logps/rejected": 0.028455784544348717,
1262
+ "loss": 0.1789,
1263
+ "step": 1130
1264
+ },
1265
+ {
1266
+ "epoch": 1.8296719831477581,
1267
+ "grad_norm": 0.478515625,
1268
+ "learning_rate": 1.980829774663256e-05,
1269
+ "logps_logqs/chosen": 0.07289810478687286,
1270
+ "logqs/chosen": -0.7838630676269531,
1271
+ "logqs/rejected": -0.9698160290718079,
1272
+ "logqs_logps/rejected": 0.02906595729291439,
1273
+ "loss": 0.1794,
1274
+ "step": 1140
1275
+ },
1276
+ {
1277
+ "epoch": 1.8457217373858963,
1278
+ "grad_norm": 0.458984375,
1279
+ "learning_rate": 1.9352466499002422e-05,
1280
+ "logps_logqs/chosen": 0.0761161744594574,
1281
+ "logqs/chosen": -0.7819662094116211,
1282
+ "logqs/rejected": -0.9463040232658386,
1283
+ "logqs_logps/rejected": 0.028005197644233704,
1284
+ "loss": 0.183,
1285
+ "step": 1150
1286
+ },
1287
+ {
1288
+ "epoch": 1.8617714916240344,
1289
+ "grad_norm": 0.474609375,
1290
+ "learning_rate": 1.8898605377146383e-05,
1291
+ "logps_logqs/chosen": 0.06957074254751205,
1292
+ "logqs/chosen": -0.7857328057289124,
1293
+ "logqs/rejected": -0.9639459848403931,
1294
+ "logqs_logps/rejected": 0.027649903669953346,
1295
+ "loss": 0.1753,
1296
+ "step": 1160
1297
+ },
1298
+ {
1299
+ "epoch": 1.8778212458621728,
1300
+ "grad_norm": 0.48046875,
1301
+ "learning_rate": 1.8446872709207847e-05,
1302
+ "logps_logqs/chosen": 0.07362545281648636,
1303
+ "logqs/chosen": -0.7835830450057983,
1304
+ "logqs/rejected": -1.0045692920684814,
1305
+ "logqs_logps/rejected": 0.027295967563986778,
1306
+ "loss": 0.1806,
1307
+ "step": 1170
1308
+ },
1309
+ {
1310
+ "epoch": 1.893871000100311,
1311
+ "grad_norm": 0.51171875,
1312
+ "learning_rate": 1.799742608082519e-05,
1313
+ "logps_logqs/chosen": 0.07236044853925705,
1314
+ "logqs/chosen": -0.7885088324546814,
1315
+ "logqs/rejected": -0.9487521052360535,
1316
+ "logqs_logps/rejected": 0.026073191314935684,
1317
+ "loss": 0.1723,
1318
+ "step": 1180
1319
+ },
1320
+ {
1321
+ "epoch": 1.909920754338449,
1322
+ "grad_norm": 0.51171875,
1323
+ "learning_rate": 1.7550422280158513e-05,
1324
+ "logps_logqs/chosen": 0.0707259327173233,
1325
+ "logqs/chosen": -0.7704340815544128,
1326
+ "logqs/rejected": -0.9094502329826355,
1327
+ "logqs_logps/rejected": 0.027560651302337646,
1328
+ "loss": 0.1758,
1329
+ "step": 1190
1330
+ },
1331
+ {
1332
+ "epoch": 1.9259705085765875,
1333
+ "grad_norm": 0.435546875,
1334
+ "learning_rate": 1.7106017243194487e-05,
1335
+ "logps_logqs/chosen": 0.07020822167396545,
1336
+ "logqs/chosen": -0.7692683935165405,
1337
+ "logqs/rejected": -0.9656769037246704,
1338
+ "logqs_logps/rejected": 0.02792223170399666,
1339
+ "loss": 0.177,
1340
+ "step": 1200
1341
+ },
1342
+ {
1343
+ "epoch": 1.9420202628147256,
1344
+ "grad_norm": 0.53515625,
1345
+ "learning_rate": 1.6664365999348594e-05,
1346
+ "logps_logqs/chosen": 0.06943775713443756,
1347
+ "logqs/chosen": -0.7688643932342529,
1348
+ "logqs/rejected": -0.9713398218154907,
1349
+ "logqs_logps/rejected": 0.026859009638428688,
1350
+ "loss": 0.1748,
1351
+ "step": 1210
1352
+ },
1353
+ {
1354
+ "epoch": 1.9580700170528638,
1355
+ "grad_norm": 0.51953125,
1356
+ "learning_rate": 1.6225622617383494e-05,
1357
+ "logps_logqs/chosen": 0.07070201635360718,
1358
+ "logqs/chosen": -0.7839328050613403,
1359
+ "logqs/rejected": -0.9712308645248413,
1360
+ "logqs_logps/rejected": 0.028167420998215675,
1361
+ "loss": 0.1763,
1362
+ "step": 1220
1363
+ },
1364
+ {
1365
+ "epoch": 1.9741197712910021,
1366
+ "grad_norm": 0.4453125,
1367
+ "learning_rate": 1.578994015166263e-05,
1368
+ "logps_logqs/chosen": 0.07289667427539825,
1369
+ "logqs/chosen": -0.7838398814201355,
1370
+ "logqs/rejected": -0.9522368311882019,
1371
+ "logqs_logps/rejected": 0.02768387272953987,
1372
+ "loss": 0.1773,
1373
+ "step": 1230
1374
+ },
1375
+ {
1376
+ "epoch": 1.9901695255291403,
1377
+ "grad_norm": 0.53515625,
1378
+ "learning_rate": 1.535747058875765e-05,
1379
+ "logps_logqs/chosen": 0.06881529092788696,
1380
+ "logqs/chosen": -0.790834903717041,
1381
+ "logqs/rejected": -0.9497320055961609,
1382
+ "logqs_logps/rejected": 0.029328888282179832,
1383
+ "loss": 0.1734,
1384
+ "step": 1240
1385
+ },
1386
+ {
1387
+ "epoch": 2.0062192797672784,
1388
+ "grad_norm": 0.43359375,
1389
+ "learning_rate": 1.4928364794428307e-05,
1390
+ "logps_logqs/chosen": 0.07006263732910156,
1391
+ "logqs/chosen": -0.799282431602478,
1392
+ "logqs/rejected": -1.0095001459121704,
1393
+ "logqs_logps/rejected": 0.02767905592918396,
1394
+ "loss": 0.1732,
1395
+ "step": 1250
1396
+ },
1397
+ {
1398
+ "epoch": 2.022269034005417,
1399
+ "grad_norm": 0.4296875,
1400
+ "learning_rate": 1.4502772460993385e-05,
1401
+ "logps_logqs/chosen": 0.06896007061004639,
1402
+ "logqs/chosen": -0.7676675319671631,
1403
+ "logqs/rejected": -0.9425755739212036,
1404
+ "logqs_logps/rejected": 0.02848168835043907,
1405
+ "loss": 0.1685,
1406
+ "step": 1260
1407
+ },
1408
+ {
1409
+ "epoch": 2.038318788243555,
1410
+ "grad_norm": 0.462890625,
1411
+ "learning_rate": 1.4080842055110993e-05,
1412
+ "logps_logqs/chosen": 0.06711134314537048,
1413
+ "logqs/chosen": -0.7576900720596313,
1414
+ "logqs/rejected": -0.9405368566513062,
1415
+ "logqs_logps/rejected": 0.02635120414197445,
1416
+ "loss": 0.1689,
1417
+ "step": 1270
1418
+ },
1419
+ {
1420
+ "epoch": 2.054368542481693,
1421
+ "grad_norm": 0.46484375,
1422
+ "learning_rate": 1.3662720765986341e-05,
1423
+ "logps_logqs/chosen": 0.06902097165584564,
1424
+ "logqs/chosen": -0.788707435131073,
1425
+ "logqs/rejected": -0.9549592733383179,
1426
+ "logqs_logps/rejected": 0.028454547747969627,
1427
+ "loss": 0.1686,
1428
+ "step": 1280
1429
+ },
1430
+ {
1431
+ "epoch": 2.0704182967198315,
1432
+ "grad_norm": 0.40625,
1433
+ "learning_rate": 1.3248554454025275e-05,
1434
+ "logps_logqs/chosen": 0.07042928040027618,
1435
+ "logqs/chosen": -0.7786656618118286,
1436
+ "logqs/rejected": -0.9685086011886597,
1437
+ "logqs_logps/rejected": 0.025051862001419067,
1438
+ "loss": 0.1673,
1439
+ "step": 1290
1440
+ },
1441
+ {
1442
+ "epoch": 2.08646805095797,
1443
+ "grad_norm": 0.447265625,
1444
+ "learning_rate": 1.2838487599951243e-05,
1445
+ "logps_logqs/chosen": 0.06794705986976624,
1446
+ "logqs/chosen": -0.7671617269515991,
1447
+ "logqs/rejected": -0.9481566548347473,
1448
+ "logqs_logps/rejected": 0.025887325406074524,
1449
+ "loss": 0.1696,
1450
+ "step": 1300
1451
+ },
1452
+ {
1453
+ "epoch": 2.1025178051961078,
1454
+ "grad_norm": 0.439453125,
1455
+ "learning_rate": 1.2432663254403638e-05,
1456
+ "logps_logqs/chosen": 0.06914719194173813,
1457
+ "logqs/chosen": -0.8097491264343262,
1458
+ "logqs/rejected": -0.9609512090682983,
1459
+ "logqs_logps/rejected": 0.025332655757665634,
1460
+ "loss": 0.1689,
1461
+ "step": 1310
1462
+ },
1463
+ {
1464
+ "epoch": 2.118567559434246,
1465
+ "grad_norm": 0.421875,
1466
+ "learning_rate": 1.2031222988034967e-05,
1467
+ "logps_logqs/chosen": 0.06850691139698029,
1468
+ "logqs/chosen": -0.7866081595420837,
1469
+ "logqs/rejected": -0.952921986579895,
1470
+ "logqs_logps/rejected": 0.02708747610449791,
1471
+ "loss": 0.1672,
1472
+ "step": 1320
1473
+ },
1474
+ {
1475
+ "epoch": 2.1346173136723845,
1476
+ "grad_norm": 0.45703125,
1477
+ "learning_rate": 1.1634306842124423e-05,
1478
+ "logps_logqs/chosen": 0.06721793115139008,
1479
+ "logqs/chosen": -0.7912784218788147,
1480
+ "logqs/rejected": -0.9742089509963989,
1481
+ "logqs_logps/rejected": 0.024986112490296364,
1482
+ "loss": 0.1668,
1483
+ "step": 1330
1484
+ },
1485
+ {
1486
+ "epoch": 2.1506670679105224,
1487
+ "grad_norm": 0.451171875,
1488
+ "learning_rate": 1.1242053279724762e-05,
1489
+ "logps_logqs/chosen": 0.06856787204742432,
1490
+ "logqs/chosen": -0.7681561708450317,
1491
+ "logqs/rejected": -0.9738146066665649,
1492
+ "logqs_logps/rejected": 0.025526920333504677,
1493
+ "loss": 0.1676,
1494
+ "step": 1340
1495
+ },
1496
+ {
1497
+ "epoch": 2.166716822148661,
1498
+ "grad_norm": 0.44140625,
1499
+ "learning_rate": 1.0854599137359953e-05,
1500
+ "logps_logqs/chosen": 0.07144749909639359,
1501
+ "logqs/chosen": -0.7849553823471069,
1502
+ "logqs/rejected": -0.9945628046989441,
1503
+ "logqs_logps/rejected": 0.02589735947549343,
1504
+ "loss": 0.1683,
1505
+ "step": 1350
1506
+ },
1507
+ {
1508
+ "epoch": 2.182766576386799,
1509
+ "grad_norm": 0.3984375,
1510
+ "learning_rate": 1.0472079577290111e-05,
1511
+ "logps_logqs/chosen": 0.06926636397838593,
1512
+ "logqs/chosen": -0.780665934085846,
1513
+ "logqs/rejected": -0.9741228222846985,
1514
+ "logqs_logps/rejected": 0.0255129374563694,
1515
+ "loss": 0.1668,
1516
+ "step": 1360
1517
+ },
1518
+ {
1519
+ "epoch": 2.198816330624937,
1520
+ "grad_norm": 0.431640625,
1521
+ "learning_rate": 1.009462804036059e-05,
1522
+ "logps_logqs/chosen": 0.06916262209415436,
1523
+ "logqs/chosen": -0.7870660424232483,
1524
+ "logqs/rejected": -0.9848629236221313,
1525
+ "logqs_logps/rejected": 0.024935439229011536,
1526
+ "loss": 0.1688,
1527
+ "step": 1370
1528
+ },
1529
+ {
1530
+ "epoch": 2.2148660848630755,
1531
+ "grad_norm": 0.48046875,
1532
+ "learning_rate": 9.722376199451436e-06,
1533
+ "logps_logqs/chosen": 0.06875228136777878,
1534
+ "logqs/chosen": -0.7785555124282837,
1535
+ "logqs/rejected": -0.9765084981918335,
1536
+ "logqs_logps/rejected": 0.02562164142727852,
1537
+ "loss": 0.1677,
1538
+ "step": 1380
1539
+ },
1540
+ {
1541
+ "epoch": 2.230915839101214,
1542
+ "grad_norm": 0.40234375,
1543
+ "learning_rate": 9.35545391354378e-06,
1544
+ "logps_logqs/chosen": 0.06958961486816406,
1545
+ "logqs/chosen": -0.7699087262153625,
1546
+ "logqs/rejected": -0.9525764584541321,
1547
+ "logqs_logps/rejected": 0.027162248268723488,
1548
+ "loss": 0.1672,
1549
+ "step": 1390
1550
+ },
1551
+ {
1552
+ "epoch": 2.246965593339352,
1553
+ "grad_norm": 0.416015625,
1554
+ "learning_rate": 8.993989182418824e-06,
1555
+ "logps_logqs/chosen": 0.0719502717256546,
1556
+ "logqs/chosen": -0.7669367790222168,
1557
+ "logqs/rejected": -0.9486163258552551,
1558
+ "logqs_logps/rejected": 0.02480030246078968,
1559
+ "loss": 0.1683,
1560
+ "step": 1400
1561
+ },
1562
+ {
1563
+ "epoch": 2.26301534757749,
1564
+ "grad_norm": 0.48046875,
1565
+ "learning_rate": 8.63810810200556e-06,
1566
+ "logps_logqs/chosen": 0.06904618442058563,
1567
+ "logqs/chosen": -0.7622929811477661,
1568
+ "logqs/rejected": -0.9743655920028687,
1569
+ "logqs_logps/rejected": 0.026619747281074524,
1570
+ "loss": 0.1678,
1571
+ "step": 1410
1572
+ },
1573
+ {
1574
+ "epoch": 2.2790651018156285,
1575
+ "grad_norm": 0.40234375,
1576
+ "learning_rate": 8.287934820392498e-06,
1577
+ "logps_logqs/chosen": 0.06923404335975647,
1578
+ "logqs/chosen": -0.7849544286727905,
1579
+ "logqs/rejected": -0.9630452990531921,
1580
+ "logqs_logps/rejected": 0.023480530828237534,
1581
+ "loss": 0.1651,
1582
+ "step": 1420
1583
+ },
1584
+ {
1585
+ "epoch": 2.295114856053767,
1586
+ "grad_norm": 0.4140625,
1587
+ "learning_rate": 7.943591494519016e-06,
1588
+ "logps_logqs/chosen": 0.06750839948654175,
1589
+ "logqs/chosen": -0.764995276927948,
1590
+ "logqs/rejected": -0.9358587265014648,
1591
+ "logqs_logps/rejected": 0.027208849787712097,
1592
+ "loss": 0.1667,
1593
+ "step": 1430
1594
+ },
1595
+ {
1596
+ "epoch": 2.311164610291905,
1597
+ "grad_norm": 0.40625,
1598
+ "learning_rate": 7.60519824756124e-06,
1599
+ "logps_logqs/chosen": 0.06875176727771759,
1600
+ "logqs/chosen": -0.8004514575004578,
1601
+ "logqs/rejected": -1.0356009006500244,
1602
+ "logqs_logps/rejected": 0.0247772429138422,
1603
+ "loss": 0.1681,
1604
+ "step": 1440
1605
+ },
1606
+ {
1607
+ "epoch": 2.327214364530043,
1608
+ "grad_norm": 0.41015625,
1609
+ "learning_rate": 7.272873127027449e-06,
1610
+ "logps_logqs/chosen": 0.06980612874031067,
1611
+ "logqs/chosen": -0.7612735033035278,
1612
+ "logqs/rejected": -0.9591197967529297,
1613
+ "logqs_logps/rejected": 0.0231007132679224,
1614
+ "loss": 0.1652,
1615
+ "step": 1450
1616
+ },
1617
+ {
1618
+ "epoch": 2.3432641187681815,
1619
+ "grad_norm": 0.41796875,
1620
+ "learning_rate": 6.946732063577488e-06,
1621
+ "logps_logqs/chosen": 0.0690704956650734,
1622
+ "logqs/chosen": -0.7698851823806763,
1623
+ "logqs/rejected": -0.949694037437439,
1624
+ "logqs_logps/rejected": 0.023757826536893845,
1625
+ "loss": 0.1655,
1626
+ "step": 1460
1627
+ },
1628
+ {
1629
+ "epoch": 2.3593138730063195,
1630
+ "grad_norm": 0.4375,
1631
+ "learning_rate": 6.6268888305807296e-06,
1632
+ "logps_logqs/chosen": 0.06597896665334702,
1633
+ "logqs/chosen": -0.7630634903907776,
1634
+ "logqs/rejected": -0.9974457025527954,
1635
+ "logqs_logps/rejected": 0.02252907119691372,
1636
+ "loss": 0.1648,
1637
+ "step": 1470
1638
+ },
1639
+ {
1640
+ "epoch": 2.375363627244458,
1641
+ "grad_norm": 0.404296875,
1642
+ "learning_rate": 6.313455004426577e-06,
1643
+ "logps_logqs/chosen": 0.06898193061351776,
1644
+ "logqs/chosen": -0.77290940284729,
1645
+ "logqs/rejected": -0.9675156474113464,
1646
+ "logqs_logps/rejected": 0.024294773116707802,
1647
+ "loss": 0.166,
1648
+ "step": 1480
1649
+ },
1650
+ {
1651
+ "epoch": 2.391413381482596,
1652
+ "grad_norm": 0.443359375,
1653
+ "learning_rate": 6.0065399256013204e-06,
1654
+ "logps_logqs/chosen": 0.0698733851313591,
1655
+ "logqs/chosen": -0.761997401714325,
1656
+ "logqs/rejected": -0.9315551519393921,
1657
+ "logqs_logps/rejected": 0.02528567612171173,
1658
+ "loss": 0.1696,
1659
+ "step": 1490
1660
+ },
1661
+ {
1662
+ "epoch": 2.407463135720734,
1663
+ "grad_norm": 0.423828125,
1664
+ "learning_rate": 5.7062506605450454e-06,
1665
+ "logps_logqs/chosen": 0.06849467009305954,
1666
+ "logqs/chosen": -0.7781127095222473,
1667
+ "logqs/rejected": -0.9850066304206848,
1668
+ "logqs_logps/rejected": 0.026143735274672508,
1669
+ "loss": 0.1703,
1670
+ "step": 1500
1671
+ },
1672
+ {
1673
+ "epoch": 2.4235128899588725,
1674
+ "grad_norm": 0.443359375,
1675
+ "learning_rate": 5.412691964301827e-06,
1676
+ "logps_logqs/chosen": 0.06971971690654755,
1677
+ "logqs/chosen": -0.7925983667373657,
1678
+ "logqs/rejected": -0.9926835894584656,
1679
+ "logqs_logps/rejected": 0.025326719507575035,
1680
+ "loss": 0.1685,
1681
+ "step": 1510
1682
+ },
1683
+ {
1684
+ "epoch": 2.439562644197011,
1685
+ "grad_norm": 0.470703125,
1686
+ "learning_rate": 5.125966243976218e-06,
1687
+ "logps_logqs/chosen": 0.07100898772478104,
1688
+ "logqs/chosen": -0.7900832891464233,
1689
+ "logqs/rejected": -0.9629155993461609,
1690
+ "logqs_logps/rejected": 0.025052938610315323,
1691
+ "loss": 0.1693,
1692
+ "step": 1520
1693
+ },
1694
+ {
1695
+ "epoch": 2.455612398435149,
1696
+ "grad_norm": 0.453125,
1697
+ "learning_rate": 4.846173523008824e-06,
1698
+ "logps_logqs/chosen": 0.06859283149242401,
1699
+ "logqs/chosen": -0.7894052267074585,
1700
+ "logqs/rejected": -0.9897419810295105,
1701
+ "logqs_logps/rejected": 0.02611861191689968,
1702
+ "loss": 0.1683,
1703
+ "step": 1530
1704
+ },
1705
+ {
1706
+ "epoch": 2.471662152673287,
1707
+ "grad_norm": 0.439453125,
1708
+ "learning_rate": 4.573411406283409e-06,
1709
+ "logps_logqs/chosen": 0.07065166532993317,
1710
+ "logqs/chosen": -0.8073331117630005,
1711
+ "logqs/rejected": -0.9767266511917114,
1712
+ "logqs_logps/rejected": 0.0274626724421978,
1713
+ "loss": 0.1756,
1714
+ "step": 1540
1715
+ },
1716
+ {
1717
+ "epoch": 2.4877119069114255,
1718
+ "grad_norm": 0.44140625,
1719
+ "learning_rate": 4.307775046077739e-06,
1720
+ "logps_logqs/chosen": 0.0715617686510086,
1721
+ "logqs/chosen": -0.8018879890441895,
1722
+ "logqs/rejected": -0.9990041851997375,
1723
+ "logqs_logps/rejected": 0.026406193152070045,
1724
+ "loss": 0.1719,
1725
+ "step": 1550
1726
+ },
1727
+ {
1728
+ "epoch": 2.5037616611495634,
1729
+ "grad_norm": 0.46875,
1730
+ "learning_rate": 4.049357108869964e-06,
1731
+ "logps_logqs/chosen": 0.06756819784641266,
1732
+ "logqs/chosen": -0.7610915899276733,
1733
+ "logqs/rejected": -0.972091794013977,
1734
+ "logqs_logps/rejected": 0.026423901319503784,
1735
+ "loss": 0.1683,
1736
+ "step": 1560
1737
+ },
1738
+ {
1739
+ "epoch": 2.519811415387702,
1740
+ "grad_norm": 0.439453125,
1741
+ "learning_rate": 3.798247743012201e-06,
1742
+ "logps_logqs/chosen": 0.06909768283367157,
1743
+ "logqs/chosen": -0.7960779666900635,
1744
+ "logqs/rejected": -0.9779514074325562,
1745
+ "logqs_logps/rejected": 0.0239783376455307,
1746
+ "loss": 0.1662,
1747
+ "step": 1570
1748
+ },
1749
+ {
1750
+ "epoch": 2.53586116962584,
1751
+ "grad_norm": 0.43359375,
1752
+ "learning_rate": 3.554534547282512e-06,
1753
+ "logps_logqs/chosen": 0.07034210860729218,
1754
+ "logqs/chosen": -0.7767470479011536,
1755
+ "logqs/rejected": -0.9516876339912415,
1756
+ "logqs_logps/rejected": 0.028661763295531273,
1757
+ "loss": 0.17,
1758
+ "step": 1580
1759
+ },
1760
+ {
1761
+ "epoch": 2.5519109238639786,
1762
+ "grad_norm": 0.404296875,
1763
+ "learning_rate": 3.318302540326343e-06,
1764
+ "logps_logqs/chosen": 0.07079549133777618,
1765
+ "logqs/chosen": -0.777116596698761,
1766
+ "logqs/rejected": -0.9669076204299927,
1767
+ "logqs_logps/rejected": 0.026164010167121887,
1768
+ "loss": 0.1723,
1769
+ "step": 1590
1770
+ },
1771
+ {
1772
+ "epoch": 2.5679606781021165,
1773
+ "grad_norm": 0.431640625,
1774
+ "learning_rate": 3.089634130998026e-06,
1775
+ "logps_logqs/chosen": 0.06778942793607712,
1776
+ "logqs/chosen": -0.7698632478713989,
1777
+ "logqs/rejected": -0.9828590154647827,
1778
+ "logqs_logps/rejected": 0.02352612093091011,
1779
+ "loss": 0.166,
1780
+ "step": 1600
1781
+ },
1782
+ {
1783
+ "epoch": 2.584010432340255,
1784
+ "grad_norm": 0.4296875,
1785
+ "learning_rate": 2.8686090896126587e-06,
1786
+ "logps_logqs/chosen": 0.06593993306159973,
1787
+ "logqs/chosen": -0.7838481664657593,
1788
+ "logqs/rejected": -0.9900785684585571,
1789
+ "logqs_logps/rejected": 0.023241404443979263,
1790
+ "loss": 0.1633,
1791
+ "step": 1610
1792
+ },
1793
+ {
1794
+ "epoch": 2.6000601865783928,
1795
+ "grad_norm": 0.439453125,
1796
+ "learning_rate": 2.655304520118482e-06,
1797
+ "logps_logqs/chosen": 0.0693887323141098,
1798
+ "logqs/chosen": -0.778706431388855,
1799
+ "logqs/rejected": -0.9755508303642273,
1800
+ "logqs_logps/rejected": 0.02637295424938202,
1801
+ "loss": 0.1664,
1802
+ "step": 1620
1803
+ },
1804
+ {
1805
+ "epoch": 2.616109940816531,
1806
+ "grad_norm": 0.44140625,
1807
+ "learning_rate": 2.44979483319939e-06,
1808
+ "logps_logqs/chosen": 0.06880888342857361,
1809
+ "logqs/chosen": -0.7670097351074219,
1810
+ "logqs/rejected": -1.0248219966888428,
1811
+ "logqs_logps/rejected": 0.025341719388961792,
1812
+ "loss": 0.1667,
1813
+ "step": 1630
1814
+ },
1815
+ {
1816
+ "epoch": 2.6321596950546695,
1817
+ "grad_norm": 0.44140625,
1818
+ "learning_rate": 2.252151720316964e-06,
1819
+ "logps_logqs/chosen": 0.0697592943906784,
1820
+ "logqs/chosen": -0.7662806510925293,
1821
+ "logqs/rejected": -0.9526630640029907,
1822
+ "logqs_logps/rejected": 0.024268481880426407,
1823
+ "loss": 0.1672,
1824
+ "step": 1640
1825
+ },
1826
+ {
1827
+ "epoch": 2.648209449292808,
1828
+ "grad_norm": 0.458984375,
1829
+ "learning_rate": 2.0624441287011213e-06,
1830
+ "logps_logqs/chosen": 0.06959348171949387,
1831
+ "logqs/chosen": -0.7930929064750671,
1832
+ "logqs/rejected": -0.9900212287902832,
1833
+ "logqs_logps/rejected": 0.02552485466003418,
1834
+ "loss": 0.1705,
1835
+ "step": 1650
1836
+ },
1837
+ {
1838
+ "epoch": 2.664259203530946,
1839
+ "grad_norm": 0.419921875,
1840
+ "learning_rate": 1.8807382372980687e-06,
1841
+ "logps_logqs/chosen": 0.06878109276294708,
1842
+ "logqs/chosen": -0.776373565196991,
1843
+ "logqs/rejected": -0.9679840207099915,
1844
+ "logqs_logps/rejected": 0.024887990206480026,
1845
+ "loss": 0.1656,
1846
+ "step": 1660
1847
+ },
1848
+ {
1849
+ "epoch": 2.680308957769084,
1850
+ "grad_norm": 0.431640625,
1851
+ "learning_rate": 1.7070974336839796e-06,
1852
+ "logps_logqs/chosen": 0.06730766594409943,
1853
+ "logqs/chosen": -0.7652665972709656,
1854
+ "logqs/rejected": -0.9662426114082336,
1855
+ "logqs_logps/rejected": 0.026059061288833618,
1856
+ "loss": 0.1667,
1857
+ "step": 1670
1858
+ },
1859
+ {
1860
+ "epoch": 2.6963587120072225,
1861
+ "grad_norm": 0.453125,
1862
+ "learning_rate": 1.541582291952401e-06,
1863
+ "logps_logqs/chosen": 0.06985867768526077,
1864
+ "logqs/chosen": -0.7848519086837769,
1865
+ "logqs/rejected": -0.9851476550102234,
1866
+ "logqs_logps/rejected": 0.02311038039624691,
1867
+ "loss": 0.1688,
1868
+ "step": 1680
1869
+ },
1870
+ {
1871
+ "epoch": 2.7124084662453605,
1872
+ "grad_norm": 0.4375,
1873
+ "learning_rate": 1.38425055158318e-06,
1874
+ "logps_logqs/chosen": 0.0703495591878891,
1875
+ "logqs/chosen": -0.7814745903015137,
1876
+ "logqs/rejected": -0.9425519108772278,
1877
+ "logqs_logps/rejected": 0.026310011744499207,
1878
+ "loss": 0.1639,
1879
+ "step": 1690
1880
+ },
1881
+ {
1882
+ "epoch": 2.728458220483499,
1883
+ "grad_norm": 0.44921875,
1884
+ "learning_rate": 1.235157097300188e-06,
1885
+ "logps_logqs/chosen": 0.06932912766933441,
1886
+ "logqs/chosen": -0.7783406972885132,
1887
+ "logqs/rejected": -0.9719565510749817,
1888
+ "logqs_logps/rejected": 0.025869470089673996,
1889
+ "loss": 0.1671,
1890
+ "step": 1700
1891
+ },
1892
+ {
1893
+ "epoch": 2.744507974721637,
1894
+ "grad_norm": 0.40625,
1895
+ "learning_rate": 1.0943539399249635e-06,
1896
+ "logps_logqs/chosen": 0.07044648379087448,
1897
+ "logqs/chosen": -0.776070237159729,
1898
+ "logqs/rejected": -0.9304519891738892,
1899
+ "logqs_logps/rejected": 0.02800569497048855,
1900
+ "loss": 0.169,
1901
+ "step": 1710
1902
+ },
1903
+ {
1904
+ "epoch": 2.7605577289597756,
1905
+ "grad_norm": 0.4609375,
1906
+ "learning_rate": 9.618901982328704e-07,
1907
+ "logps_logqs/chosen": 0.06949154287576675,
1908
+ "logqs/chosen": -0.7787348031997681,
1909
+ "logqs/rejected": -0.9538747668266296,
1910
+ "logqs_logps/rejected": 0.027285417541861534,
1911
+ "loss": 0.169,
1912
+ "step": 1720
1913
+ },
1914
+ {
1915
+ "epoch": 2.7766074831979135,
1916
+ "grad_norm": 0.427734375,
1917
+ "learning_rate": 8.378120818181707e-07,
1918
+ "logps_logqs/chosen": 0.07005327194929123,
1919
+ "logqs/chosen": -0.7800394296646118,
1920
+ "logqs/rejected": -0.9388517141342163,
1921
+ "logqs_logps/rejected": 0.024246862158179283,
1922
+ "loss": 0.1638,
1923
+ "step": 1730
1924
+ },
1925
+ {
1926
+ "epoch": 2.792657237436052,
1927
+ "grad_norm": 0.39453125,
1928
+ "learning_rate": 7.221628749739223e-07,
1929
+ "logps_logqs/chosen": 0.07048022001981735,
1930
+ "logqs/chosen": -0.781145453453064,
1931
+ "logqs/rejected": -0.9654680490493774,
1932
+ "logqs_logps/rejected": 0.025187021121382713,
1933
+ "loss": 0.1646,
1934
+ "step": 1740
1935
+ },
1936
+ {
1937
+ "epoch": 2.80870699167419,
1938
+ "grad_norm": 0.41796875,
1939
+ "learning_rate": 6.149829215924025e-07,
1940
+ "logps_logqs/chosen": 0.06936784833669662,
1941
+ "logqs/chosen": -0.7857106328010559,
1942
+ "logqs/rejected": -1.0071794986724854,
1943
+ "logqs_logps/rejected": 0.02525334618985653,
1944
+ "loss": 0.1685,
1945
+ "step": 1750
1946
+ },
1947
+ {
1948
+ "epoch": 2.824756745912328,
1949
+ "grad_norm": 0.396484375,
1950
+ "learning_rate": 5.163096110912368e-07,
1951
+ "logps_logqs/chosen": 0.0710034891963005,
1952
+ "logqs/chosen": -0.7809524536132812,
1953
+ "logqs/rejected": -0.9730299711227417,
1954
+ "logqs_logps/rejected": 0.02650505304336548,
1955
+ "loss": 0.1685,
1956
+ "step": 1760
1957
+ },
1958
+ {
1959
+ "epoch": 2.8408065001504665,
1960
+ "grad_norm": 0.419921875,
1961
+ "learning_rate": 4.261773653702089e-07,
1962
+ "logps_logqs/chosen": 0.07018028199672699,
1963
+ "logqs/chosen": -0.7636402249336243,
1964
+ "logqs/rejected": -0.9362967610359192,
1965
+ "logqs_logps/rejected": 0.02620730921626091,
1966
+ "loss": 0.1674,
1967
+ "step": 1770
1968
+ },
1969
+ {
1970
+ "epoch": 2.856856254388605,
1971
+ "grad_norm": 0.5859375,
1972
+ "learning_rate": 3.4461762680329803e-07,
1973
+ "logps_logqs/chosen": 0.06559871137142181,
1974
+ "logqs/chosen": -0.7775349617004395,
1975
+ "logqs/rejected": -0.9631088376045227,
1976
+ "logqs_logps/rejected": 0.024396821856498718,
1977
+ "loss": 0.1651,
1978
+ "step": 1780
1979
+ },
1980
+ {
1981
+ "epoch": 2.872906008626743,
1982
+ "grad_norm": 0.419921875,
1983
+ "learning_rate": 2.716588472700815e-07,
1984
+ "logps_logqs/chosen": 0.07039310038089752,
1985
+ "logqs/chosen": -0.7576441168785095,
1986
+ "logqs/rejected": -0.9705005884170532,
1987
+ "logqs_logps/rejected": 0.026445040479302406,
1988
+ "loss": 0.1646,
1989
+ "step": 1790
1990
+ },
1991
+ {
1992
+ "epoch": 2.888955762864881,
1993
+ "grad_norm": 0.39453125,
1994
+ "learning_rate": 2.0732647823038243e-07,
1995
+ "logps_logqs/chosen": 0.06968151032924652,
1996
+ "logqs/chosen": -0.7765523791313171,
1997
+ "logqs/rejected": -0.9748941659927368,
1998
+ "logqs_logps/rejected": 0.02464660070836544,
1999
+ "loss": 0.1665,
2000
+ "step": 1800
2001
+ },
2002
+ {
2003
+ "epoch": 2.905005517103019,
2004
+ "grad_norm": 0.458984375,
2005
+ "learning_rate": 1.5164296184560222e-07,
2006
+ "logps_logqs/chosen": 0.06898736953735352,
2007
+ "logqs/chosen": -0.781166672706604,
2008
+ "logqs/rejected": -0.9676758646965027,
2009
+ "logqs_logps/rejected": 0.02475111000239849,
2010
+ "loss": 0.1665,
2011
+ "step": 1810
2012
+ },
2013
+ {
2014
+ "epoch": 2.9210552713411575,
2015
+ "grad_norm": 0.404296875,
2016
+ "learning_rate": 1.0462772314983882e-07,
2017
+ "logps_logqs/chosen": 0.06931523233652115,
2018
+ "logqs/chosen": -0.7759555578231812,
2019
+ "logqs/rejected": -0.9690700769424438,
2020
+ "logqs_logps/rejected": 0.024959508329629898,
2021
+ "loss": 0.1706,
2022
+ "step": 1820
2023
+ },
2024
+ {
2025
+ "epoch": 2.937105025579296,
2026
+ "grad_norm": 0.4609375,
2027
+ "learning_rate": 6.62971632735182e-08,
2028
+ "logps_logqs/chosen": 0.07144445180892944,
2029
+ "logqs/chosen": -0.7924820780754089,
2030
+ "logqs/rejected": -0.9925470352172852,
2031
+ "logqs_logps/rejected": 0.0241483636200428,
2032
+ "loss": 0.1691,
2033
+ "step": 1830
2034
+ },
2035
+ {
2036
+ "epoch": 2.9531547798174342,
2037
+ "grad_norm": 0.435546875,
2038
+ "learning_rate": 3.666465372190453e-08,
2039
+ "logps_logqs/chosen": 0.07032831013202667,
2040
+ "logqs/chosen": -0.7886664271354675,
2041
+ "logqs/rejected": -1.0002458095550537,
2042
+ "logqs_logps/rejected": 0.024760346859693527,
2043
+ "loss": 0.1681,
2044
+ "step": 1840
2045
+ },
2046
+ {
2047
+ "epoch": 2.969204534055572,
2048
+ "grad_norm": 0.431640625,
2049
+ "learning_rate": 1.57405317104925e-08,
2050
+ "logps_logqs/chosen": 0.068773552775383,
2051
+ "logqs/chosen": -0.7885466814041138,
2052
+ "logqs/rejected": -0.9979216456413269,
2053
+ "logqs_logps/rejected": 0.023049544543027878,
2054
+ "loss": 0.1691,
2055
+ "step": 1850
2056
+ },
2057
+ {
2058
+ "epoch": 2.9852542882937105,
2059
+ "grad_norm": 0.427734375,
2060
+ "learning_rate": 3.532096558903075e-09,
2061
+ "logps_logqs/chosen": 0.06932573765516281,
2062
+ "logqs/chosen": -0.7747775912284851,
2063
+ "logqs/rejected": -0.9474186897277832,
2064
+ "logqs_logps/rejected": 0.026311378926038742,
2065
+ "loss": 0.1678,
2066
+ "step": 1860
2067
+ },
2068
+ {
2069
+ "epoch": 2.999699067108035,
2070
+ "step": 1869,
2071
+ "total_flos": 0.0,
2072
+ "train_loss": 0.194109176269121,
2073
+ "train_runtime": 23250.2173,
2074
+ "train_samples_per_second": 10.29,
2075
+ "train_steps_per_second": 0.08
2076
+ }
2077
+ ],
2078
+ "logging_steps": 10,
2079
+ "max_steps": 1869,
2080
+ "num_input_tokens_seen": 0,
2081
+ "num_train_epochs": 3,
2082
+ "save_steps": 500,
2083
+ "stateful_callbacks": {
2084
+ "TrainerControl": {
2085
+ "args": {
2086
+ "should_epoch_stop": false,
2087
+ "should_evaluate": false,
2088
+ "should_log": false,
2089
+ "should_save": true,
2090
+ "should_training_stop": true
2091
+ },
2092
+ "attributes": {}
2093
+ }
2094
+ },
2095
+ "total_flos": 0.0,
2096
+ "train_batch_size": 2,
2097
+ "trial_name": null,
2098
+ "trial_params": null
2099
+ }