Jerry999 commited on
Commit
e2d07a4
·
verified ·
1 Parent(s): b4ed115

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. checkpoints/math_operations/compositional_full_sft_n_steps_2/chat_template.jinja +4 -0
  3. checkpoints/math_operations/compositional_full_sft_n_steps_2/config.json +71 -0
  4. checkpoints/math_operations/compositional_full_sft_n_steps_2/tokenizer.json +3 -0
  5. checkpoints/math_operations/compositional_full_sft_n_steps_2/tokenizer_config.json +29 -0
  6. checkpoints/math_operations/full_sft_50k_lr5e5/README.md +132 -0
  7. checkpoints/math_operations/full_sft_50k_lr5e5/chat_template.jinja +4 -0
  8. checkpoints/math_operations/full_sft_50k_lr5e5/config.json +71 -0
  9. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/balanced_test_alpaca_converted.jsonl +0 -0
  10. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/balanced_test_alpaca_results.jsonl +0 -0
  11. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/eval_results.csv +12 -0
  12. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/eval_summary.json +19 -0
  13. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_A_test_alpaca_converted.jsonl +0 -0
  14. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_A_test_alpaca_results.jsonl +0 -0
  15. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_B_test_alpaca_converted.jsonl +0 -0
  16. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_B_test_alpaca_results.jsonl +0 -0
  17. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_C_test_alpaca_converted.jsonl +0 -0
  18. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_C_test_alpaca_results.jsonl +0 -0
  19. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_D_test_alpaca_converted.jsonl +0 -0
  20. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_D_test_alpaca_results.jsonl +0 -0
  21. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_E_test_alpaca_converted.jsonl +0 -0
  22. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_E_test_alpaca_results.jsonl +0 -0
  23. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_F_test_alpaca_converted.jsonl +0 -0
  24. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_F_test_alpaca_results.jsonl +0 -0
  25. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_G_test_alpaca_converted.jsonl +0 -0
  26. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_G_test_alpaca_results.jsonl +0 -0
  27. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_H_test_alpaca_converted.jsonl +0 -0
  28. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_H_test_alpaca_results.jsonl +0 -0
  29. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/test_alpaca_converted.jsonl +0 -0
  30. checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/test_alpaca_results.jsonl +0 -0
  31. checkpoints/math_operations/full_sft_50k_lr5e5/generation_config.json +12 -0
  32. checkpoints/math_operations/full_sft_50k_lr5e5/model.safetensors +3 -0
  33. checkpoints/math_operations/full_sft_50k_lr5e5/tokenizer.json +3 -0
  34. checkpoints/math_operations/full_sft_50k_lr5e5/tokenizer_config.json +29 -0
  35. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/README.md +157 -0
  36. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/adapter_config.json +46 -0
  37. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/adapter_model.safetensors +3 -0
  38. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/chat_template.jinja +4 -0
  39. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/config.json +71 -0
  40. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/balanced_test_alpaca_converted.jsonl +0 -0
  41. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/balanced_test_alpaca_results.jsonl +0 -0
  42. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/eval_results.csv +11 -0
  43. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/eval_summary.json +19 -0
  44. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_A_test_alpaca_converted.jsonl +0 -0
  45. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_A_test_alpaca_results.jsonl +0 -0
  46. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_B_test_alpaca_converted.jsonl +0 -0
  47. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_B_test_alpaca_results.jsonl +0 -0
  48. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_C_test_alpaca_converted.jsonl +0 -0
  49. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_C_test_alpaca_results.jsonl +0 -0
  50. checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_D_test_alpaca_converted.jsonl +0 -0
.gitattributes CHANGED
@@ -37,3 +37,7 @@ checkpoints/knowledge/atomic_full_sft_50ep/tokenizer.json filter=lfs diff=lfs me
37
  checkpoints/knowledge/atomic_full_then_2step_full_sft/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
  checkpoints/knowledge/atomic_sft_lora_50ep/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  checkpoints/knowledge/atomic_sft_lora_50ep/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
37
  checkpoints/knowledge/atomic_full_then_2step_full_sft/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
  checkpoints/knowledge/atomic_sft_lora_50ep/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  checkpoints/knowledge/atomic_sft_lora_50ep/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoints/math_operations/compositional_full_sft_n_steps_2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ checkpoints/math_operations/full_sft_50k_lr5e5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ checkpoints/math_operations/primitive_atomic_balanced_sft_50k/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ checkpoints/math_operations/primitive_atomic_balanced_sft_50k/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoints/math_operations/compositional_full_sft_n_steps_2/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ ' + message['content'] + '<|im_end|>' + '
3
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
checkpoints/math_operations/compositional_full_sft_n_steps_2/config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 262144,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": 151643,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 5000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "transformers_version": "5.0.0",
68
+ "use_cache": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
checkpoints/math_operations/compositional_full_sft_n_steps_2/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
checkpoints/math_operations/compositional_full_sft_n_steps_2/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": true,
24
+ "model_max_length": 1010000,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }
checkpoints/math_operations/full_sft_50k_lr5e5/README.md ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl
7
+ model-index:
8
+ - name: home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/full_sft_50k_lr5e5
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
+ <details><summary>See axolotl config</summary>
17
+
18
+ axolotl version: `0.15.0.dev0`
19
+ ```yaml
20
+ # Qwen3-4B full fine-tuning SFT — LR 5e-5
21
+
22
+ base_model: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/models/Qwen/Qwen3-4B-Instruct-2507
23
+
24
+ load_in_8bit: false
25
+ load_in_4bit: false
26
+ strict: false
27
+
28
+ datasets:
29
+ - path: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl
30
+ type: alpaca
31
+ dataset_prepared_path:
32
+ val_set_size: 0
33
+ chat_template: chatml
34
+
35
+ test_datasets:
36
+ - path: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_val_alpaca.jsonl
37
+ type: alpaca
38
+
39
+ output_dir: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/full_sft_50k_lr5e5
40
+
41
+ sequence_len: 2048
42
+ sample_packing: true
43
+ eval_sample_packing: true
44
+
45
+ gradient_accumulation_steps: 8
46
+ micro_batch_size: 1
47
+ num_epochs: 3
48
+ optimizer: adamw_torch_fused
49
+ lr_scheduler: cosine
50
+ learning_rate: 5e-5
51
+
52
+ bf16: auto
53
+ tf32: true
54
+
55
+ gradient_checkpointing: true
56
+ gradient_checkpointing_kwargs:
57
+ use_reentrant: false
58
+
59
+ logging_steps: 10
60
+ flash_attention: true
61
+ warmup_ratio: 0.1
62
+ evals_per_epoch: 2
63
+ saves_per_epoch: 1
64
+ save_total_limit: 1
65
+ weight_decay: 0.01
66
+
67
+ wandb_project: math_operations_sft
68
+ wandb_name: qwen3-4b-full-sft-50k-lr5e5
69
+ wandb_log_model: "false"
70
+
71
+ special_tokens:
72
+
73
+ ```
74
+
75
+ </details><br>
76
+
77
+ # home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/full_sft_50k_lr5e5
78
+
79
+ This model was trained from scratch on the /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl dataset.
80
+ It achieves the following results on the evaluation set:
81
+ - Loss: 0.0001
82
+ - Ppl: 1.0001
83
+ - Memory/max Active (gib): 33.95
84
+ - Memory/max Allocated (gib): 33.95
85
+ - Memory/device Reserved (gib): 35.97
86
+
87
+ ## Model description
88
+
89
+ More information needed
90
+
91
+ ## Intended uses & limitations
92
+
93
+ More information needed
94
+
95
+ ## Training and evaluation data
96
+
97
+ More information needed
98
+
99
+ ## Training procedure
100
+
101
+ ### Training hyperparameters
102
+
103
+ The following hyperparameters were used during training:
104
+ - learning_rate: 5e-05
105
+ - train_batch_size: 1
106
+ - eval_batch_size: 1
107
+ - seed: 42
108
+ - gradient_accumulation_steps: 8
109
+ - total_train_batch_size: 8
110
+ - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
111
+ - lr_scheduler_type: cosine
112
+ - lr_scheduler_warmup_steps: 312
113
+ - training_steps: 3123
114
+
115
+ ### Training results
116
+
117
+ | Training Loss | Epoch | Step | Validation Loss | Ppl | Active (gib) | Allocated (gib) | Reserved (gib) |
118
+ |:-------------:|:------:|:----:|:---------------:|:------:|:------------:|:---------------:|:--------------:|
119
+ | No log | 0 | 0 | 0.8898 | 2.4345 | 10.41 | 10.41 | 10.64 |
120
+ | 0.0029 | 0.5002 | 521 | 0.0023 | 1.0023 | 33.97 | 33.97 | 36.5 |
121
+ | 0.0003 | 1.0 | 1042 | 0.0005 | 1.0005 | 33.95 | 33.95 | 35.97 |
122
+ | 0.0003 | 1.5002 | 1563 | 0.0003 | 1.0003 | 33.95 | 33.95 | 35.97 |
123
+ | 0.0002 | 2.0 | 2084 | 0.0001 | 1.0001 | 33.95 | 33.95 | 35.97 |
124
+ | 0.0001 | 2.5002 | 2605 | 0.0001 | 1.0001 | 33.95 | 33.95 | 35.97 |
125
+
126
+
127
+ ### Framework versions
128
+
129
+ - Transformers 5.0.0
130
+ - Pytorch 2.8.0+cu128
131
+ - Datasets 4.5.0
132
+ - Tokenizers 0.22.2
checkpoints/math_operations/full_sft_50k_lr5e5/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ ' + message['content'] + '<|im_end|>' + '
3
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
checkpoints/math_operations/full_sft_50k_lr5e5/config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 262144,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": 151643,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 5000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "transformers_version": "5.0.0",
68
+ "use_cache": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/balanced_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/balanced_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/eval_results.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ category,filename,total,correct,accuracy,format_found,format_accuracy,errors_count
2
+ math_operations,balanced_test_alpaca_results,200,198,99.00,200,100.00,2
3
+ math_operations,balanced_test_alpaca_results,200,47,23.50,200,100.00,153
4
+ math_operations,test_alpaca_results,200,0,0.00,196,98.00,200
5
+ math_operations,op_A_test_alpaca_results,200,0,0.00,161,80.50,200
6
+ math_operations,op_B_test_alpaca_results,200,2,1.00,190,95.00,198
7
+ math_operations,op_C_test_alpaca_results,200,0,0.00,198,99.00,200
8
+ math_operations,op_D_test_alpaca_results,200,2,1.00,174,87.00,198
9
+ math_operations,op_E_test_alpaca_results,200,2,1.00,200,100.00,198
10
+ math_operations,op_F_test_alpaca_results,200,1,0.50,198,99.00,199
11
+ math_operations,op_G_test_alpaca_results,200,0,0.00,200,100.00,200
12
+ math_operations,op_H_test_alpaca_results,200,0,0.00,200,100.00,200
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/eval_summary.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "total": 200,
4
+ "correct": 0,
5
+ "accuracy": 0.0,
6
+ "format_found": 200,
7
+ "format_accuracy": 100.0
8
+ },
9
+ "per_operation": {
10
+ "A": {
11
+ "total": 200,
12
+ "correct": 0,
13
+ "accuracy": 0.0,
14
+ "format_found": 200
15
+ }
16
+ },
17
+ "n_errors": 200,
18
+ "results_file": "/home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_H_test_alpaca_results.jsonl"
19
+ }
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_A_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_A_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_B_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_B_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_C_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_C_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_D_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_D_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_E_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_E_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_F_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_F_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_G_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_G_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_H_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/op_H_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/eval_results/test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/full_sft_50k_lr5e5/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "pad_token_id": 151643,
8
+ "temperature": 0.7,
9
+ "top_k": 20,
10
+ "top_p": 0.8,
11
+ "transformers_version": "5.0.0"
12
+ }
checkpoints/math_operations/full_sft_50k_lr5e5/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d049022c056ec8956018a219d3014e48134d7acc9a4e2303db707721354ac9
3
+ size 8044982080
checkpoints/math_operations/full_sft_50k_lr5e5/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
checkpoints/math_operations/full_sft_50k_lr5e5/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "is_local": true,
9
+ "model_max_length": 1010000,
10
+ "pad_token": "<|endoftext|>",
11
+ "split_special_tokens": false,
12
+ "tokenizer_class": "Qwen2Tokenizer",
13
+ "unk_token": null,
14
+ "additional_special_tokens": [
15
+ "<|im_start|>",
16
+ "<|im_end|>",
17
+ "<|object_ref_start|>",
18
+ "<|object_ref_end|>",
19
+ "<|box_start|>",
20
+ "<|box_end|>",
21
+ "<|quad_start|>",
22
+ "<|quad_end|>",
23
+ "<|vision_start|>",
24
+ "<|vision_end|>",
25
+ "<|vision_pad|>",
26
+ "<|image_pad|>",
27
+ "<|video_pad|>"
28
+ ]
29
+ }
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - axolotl
5
+ - base_model:adapter:/home/jiaruil5/math_rl/mix_teachers/r3lit_rl/models/Qwen/Qwen3-4B-Instruct-2507
6
+ - lora
7
+ - transformers
8
+ datasets:
9
+ - /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl
10
+ pipeline_tag: text-generation
11
+ base_model: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/models/Qwen/Qwen3-4B-Instruct-2507
12
+ model-index:
13
+ - name: home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/primitive_atomic_balanced_sft_50k
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
21
+ <details><summary>See axolotl config</summary>
22
+
23
+ axolotl version: `0.15.0.dev0`
24
+ ```yaml
25
+ # Qwen3-4B LoRA SFT on primitive_atomic_balanced_sft_50k (A-H merged, CoT outputs)
26
+
27
+ base_model: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/models/Qwen/Qwen3-4B-Instruct-2507
28
+
29
+ # Model loading (full precision, no quantization)
30
+ load_in_8bit: false
31
+ load_in_4bit: false
32
+ strict: false
33
+
34
+ # Training dataset (50000 examples, 8 ops balanced @ 6250 each, CoT outputs)
35
+ datasets:
36
+ - path: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl
37
+ type: alpaca
38
+ dataset_prepared_path:
39
+ val_set_size: 0
40
+ chat_template: chatml
41
+
42
+ # Validation dataset (200 examples)
43
+ test_datasets:
44
+ - path: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_val_alpaca.jsonl
45
+ type: alpaca
46
+
47
+ output_dir: /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/primitive_atomic_balanced_sft_50k
48
+
49
+ # Sequence settings
50
+ sequence_len: 2048
51
+ sample_packing: true
52
+ eval_sample_packing: true
53
+
54
+ # LoRA configuration (full LoRA, no quantization)
55
+ adapter: lora
56
+ lora_r: 32
57
+ lora_alpha: 64
58
+ lora_dropout: 0.05
59
+ lora_target_linear: true
60
+
61
+ # Training hyperparameters
62
+ gradient_accumulation_steps: 4
63
+ micro_batch_size: 2
64
+ num_epochs: 5
65
+ optimizer: adamw_torch_fused
66
+ lr_scheduler: cosine
67
+ learning_rate: 0.0002
68
+
69
+ # Precision
70
+ bf16: auto
71
+ tf32: true
72
+
73
+ # Memory optimization
74
+ gradient_checkpointing: true
75
+ gradient_checkpointing_kwargs:
76
+ use_reentrant: false
77
+
78
+ # Logging and saving
79
+ logging_steps: 10
80
+ flash_attention: true
81
+ warmup_ratio: 0.1
82
+ evals_per_epoch: 2
83
+ saves_per_epoch: 1
84
+ weight_decay: 0.01
85
+
86
+ # Wandb logging
87
+ wandb_project: math_operations_sft
88
+ wandb_name: qwen3-4b-primitive-atomic-balanced-lora-sft-50k
89
+ wandb_log_model: "false"
90
+
91
+ special_tokens:
92
+
93
+ ```
94
+
95
+ </details><br>
96
+
97
+ # home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/primitive_atomic_balanced_sft_50k
98
+
99
+ This model was trained from scratch on the /home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/data/math_operations/primitive_atomic_balanced_sft_50k/balanced_train_alpaca.jsonl dataset.
100
+ It achieves the following results on the evaluation set:
101
+ - Loss: 0.0000
102
+ - Ppl: 1.0000
103
+ - Memory/max Active (gib): 16.23
104
+ - Memory/max Allocated (gib): 16.23
105
+ - Memory/device Reserved (gib): 20.01
106
+
107
+ ## Model description
108
+
109
+ More information needed
110
+
111
+ ## Intended uses & limitations
112
+
113
+ More information needed
114
+
115
+ ## Training and evaluation data
116
+
117
+ More information needed
118
+
119
+ ## Training procedure
120
+
121
+ ### Training hyperparameters
122
+
123
+ The following hyperparameters were used during training:
124
+ - learning_rate: 0.0002
125
+ - train_batch_size: 2
126
+ - eval_batch_size: 2
127
+ - seed: 42
128
+ - gradient_accumulation_steps: 4
129
+ - total_train_batch_size: 8
130
+ - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
131
+ - lr_scheduler_type: cosine
132
+ - lr_scheduler_warmup_steps: 515
133
+ - training_steps: 5155
134
+
135
+ ### Training results
136
+
137
+ | Training Loss | Epoch | Step | Validation Loss | Ppl | Active (gib) | Allocated (gib) | Reserved (gib) |
138
+ |:-------------:|:------:|:----:|:---------------:|:------:|:------------:|:---------------:|:--------------:|
139
+ | No log | 0 | 0 | 0.8898 | 2.4348 | 13.69 | 13.69 | 13.84 |
140
+ | 0.0009 | 0.5004 | 516 | 0.0009 | 1.0009 | 16.23 | 16.23 | 18.85 |
141
+ | 0.0003 | 1.0019 | 1032 | 0.0005 | 1.0005 | 16.73 | 16.73 | 20.01 |
142
+ | 0.0010 | 1.5023 | 1548 | 0.0007 | 1.0007 | 16.23 | 16.23 | 20.01 |
143
+ | 0.0004 | 2.0039 | 2064 | 0.0003 | 1.0003 | 16.73 | 16.73 | 20.01 |
144
+ | 0.0002 | 2.5042 | 2580 | 0.0001 | 1.0001 | 14.2 | 14.2 | 20.01 |
145
+ | 0.0003 | 3.0039 | 3096 | 0.0001 | 1.0001 | 16.73 | 16.73 | 20.01 |
146
+ | 0.0001 | 3.5042 | 3612 | 0.0002 | 1.0002 | 16.23 | 16.23 | 20.01 |
147
+ | 0.0000 | 4.0058 | 4128 | 0.0000 | 1.0000 | 16.73 | 16.73 | 20.01 |
148
+ | 0.0000 | 4.5062 | 4644 | 0.0000 | 1.0000 | 16.23 | 16.23 | 20.01 |
149
+
150
+
151
+ ### Framework versions
152
+
153
+ - PEFT 0.18.1
154
+ - Transformers 5.0.0
155
+ - Pytorch 2.8.0+cu128
156
+ - Datasets 4.5.0
157
+ - Tokenizers 0.22.2
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/home/jiaruil5/math_rl/mix_teachers/r3lit_rl/models/Qwen/Qwen3-4B-Instruct-2507",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": null,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 64,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 32,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "down_proj",
33
+ "gate_proj",
34
+ "v_proj",
35
+ "o_proj",
36
+ "k_proj",
37
+ "q_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": [],
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000255b51ab1bcb9bd0dba9e94dbee95aba886cc7127c3bb7beadc80bf8e22b4
3
+ size 264308896
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ ' + message['content'] + '<|im_end|>' + '
3
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 262144,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": null,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 5000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "transformers_version": "5.0.0",
68
+ "use_cache": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/balanced_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/balanced_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/eval_results.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ category,filename,total,correct,accuracy,format_found,format_accuracy,errors_count
2
+ math_operations,balanced_test_alpaca_results,200,200,100.00,200,100.00,0
3
+ math_operations,test_alpaca_results,200,1,0.50,199,99.50,199
4
+ math_operations,op_A_test_alpaca_results,200,8,4.00,200,100.00,192
5
+ math_operations,op_B_test_alpaca_results,200,1,0.50,200,100.00,199
6
+ math_operations,op_C_test_alpaca_results,200,1,0.50,200,100.00,199
7
+ math_operations,op_D_test_alpaca_results,200,0,0.00,200,100.00,200
8
+ math_operations,op_E_test_alpaca_results,200,0,0.00,200,100.00,200
9
+ math_operations,op_F_test_alpaca_results,200,6,3.00,200,100.00,194
10
+ math_operations,op_G_test_alpaca_results,200,1,0.50,200,100.00,199
11
+ math_operations,op_H_test_alpaca_results,200,0,0.00,198,99.00,200
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/eval_summary.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "total": 200,
4
+ "correct": 0,
5
+ "accuracy": 0.0,
6
+ "format_found": 198,
7
+ "format_accuracy": 99.0
8
+ },
9
+ "per_operation": {
10
+ "A": {
11
+ "total": 200,
12
+ "correct": 0,
13
+ "accuracy": 0.0,
14
+ "format_found": 198
15
+ }
16
+ },
17
+ "n_errors": 200,
18
+ "results_file": "/home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_H_test_alpaca_results.jsonl"
19
+ }
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_A_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_A_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_B_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_B_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_C_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_C_test_alpaca_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/primitive_atomic_balanced_sft_50k/eval_results/op_D_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff