VoCuc commited on
Commit
ca469fd
·
verified ·
1 Parent(s): 1b6acf2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. distillm-2-master/outputs/gen_0.5.log +1 -0
  2. distillm-2-master/outputs/gen_1.5.log +0 -0
  3. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/config.json +39 -0
  4. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/generation_config.json +6 -0
  5. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/merges.txt +0 -0
  6. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/model.safetensors +3 -0
  7. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/special_tokens_map.json +6 -0
  8. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer.json +0 -0
  9. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer_config.json +20 -0
  10. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/trainer_state.json +0 -0
  11. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/training_args.bin +3 -0
  12. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/vocab.json +0 -0
  13. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/config.json +39 -0
  14. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/generation_config.json +6 -0
  15. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/merges.txt +0 -0
  16. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/model.safetensors +3 -0
  17. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/special_tokens_map.json +6 -0
  18. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer.json +0 -0
  19. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer_config.json +20 -0
  20. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/trainer_state.json +0 -0
  21. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/training_args.bin +3 -0
  22. distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/vocab.json +0 -0
  23. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/config.json +39 -0
  24. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/generation_config.json +6 -0
  25. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/merges.txt +0 -0
  26. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/model.safetensors +3 -0
  27. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/special_tokens_map.json +6 -0
  28. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer.json +0 -0
  29. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer_config.json +20 -0
  30. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/trainer_state.json +0 -0
  31. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/training_args.bin +3 -0
  32. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/vocab.json +0 -0
  33. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/config.json +39 -0
  34. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/generation_config.json +6 -0
  35. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/merges.txt +0 -0
  36. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/model.safetensors +3 -0
  37. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/special_tokens_map.json +6 -0
  38. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer.json +0 -0
  39. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer_config.json +20 -0
  40. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/trainer_state.json +0 -0
  41. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/training_args.bin +3 -0
  42. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/vocab.json +0 -0
  43. distillm-2-master/outputs/gpt2-0.1b-span-distillm2/training_args.bin +3 -0
  44. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/README.md +202 -0
  45. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_config.json +33 -0
  46. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_model.safetensors +3 -0
  47. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/merges.txt +0 -0
  48. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/special_tokens_map.json +30 -0
  49. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer.json +0 -0
  50. distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer_config.json +30 -0
distillm-2-master/outputs/gen_0.5.log ADDED
@@ -0,0 +1 @@
 
0
  0%| | 0/1563 [00:00<?, ?it/s]
1
  0%| | 1/1563 [00:08<3:47:56, 8.76s/it]
2
  0%| | 2/1563 [00:16<3:29:49, 8.06s/it]
3
  0%| | 3/1563 [00:24<3:25:04, 7.89s/it]
4
  0%| | 4/1563 [00:31<3:21:31, 7.76s/it]
5
  0%| | 5/1563 [00:39<3:19:22, 7.68s/it]
6
  0%| | 6/1563 [00:46<3:18:29, 7.65s/it]
7
  0%| | 7/1563 [00:54<3:17:49, 7.63s/it]
8
  1%| | 8/1563 [01:01<3:18:04, 7.64s/it]
9
  1%| | 9/1563 [01:09<3:17:31, 7.63s/it]
10
  1%| | 10/1563 [01:17<3:17:01, 7.61s/it]
11
  1%| | 11/1563 [01:24<3:18:14, 7.66s/it]
12
  1%| | 12/1563 [01:32<3:17:33, 7.64s/it]
13
  1%| | 13/1563 [01:40<3:17:22, 7.64s/it]
14
  1%| | 14/1563 [01:47<3:17:08, 7.64s/it]
15
  1%| | 15/1563 [01:55<3:17:07, 7.64s/it]
16
  1%| | 16/1563 [02:02<3:15:56, 7.60s/it]
17
  1%| | 17/1563 [02:10<3:15:18, 7.58s/it]
18
  1%| | 18/1563 [02:17<3:14:37, 7.56s/it]
19
  1%| | 19/1563 [02:25<3:14:16, 7.55s/it]
20
  1%|▏ | 20/1563 [02:32<3:13:37, 7.53s/it]
21
  1%|▏ | 21/1563 [02:40<3:13:04, 7.51s/it]
22
  1%|▏ | 22/1563 [02:47<3:12:41, 7.50s/it]
23
  1%|▏ | 23/1563 [02:55<3:12:21, 7.49s/it]
24
  2%|▏ | 24/1563 [03:02<3:12:08, 7.49s/it]
25
  2%|▏ | 25/1563 [03:10<3:12:03, 7.49s/it]
26
  2%|▏ | 26/1563 [03:17<3:12:06, 7.50s/it]
27
  2%|▏ | 27/1563 [03:25<3:11:44, 7.49s/it]
28
  2%|▏ | 28/1563 [03:32<3:11:49, 7.50s/it]
29
  2%|▏ | 29/1563 [03:40<3:11:29, 7.49s/it]
30
  2%|▏ | 30/1563 [03:47<3:12:05, 7.52s/it]
31
  2%|▏ | 31/1563 [03:55<3:11:48, 7.51s/it]
32
  2%|▏ | 32/1563 [04:02<3:11:47, 7.52s/it]
33
  2%|▏ | 33/1563 [04:10<3:12:07, 7.53s/it]
34
  2%|▏ | 34/1563 [04:18<3:11:56, 7.53s/it]
35
  2%|▏ | 35/1563 [04:25<3:13:25, 7.59s/it]
36
  2%|▏ | 36/1563 [04:33<3:12:44, 7.57s/it]
37
  2%|▏ | 37/1563 [04:40<3:11:43, 7.54s/it]
38
  2%|▏ | 38/1563 [04:48<3:12:57, 7.59s/it]
39
  2%|▏ | 39/1563 [04:56<3:12:35, 7.58s/it]
40
  3%|▎ | 40/1563 [05:03<3:12:00, 7.56s/it]
41
  3%|▎ | 41/1563 [05:11<3:10:58, 7.53s/it]
42
  3%|▎ | 42/1563 [05:18<3:10:18, 7.51s/it]
43
  3%|▎ | 43/1563 [05:25<3:10:03, 7.50s/it]
44
  3%|▎ | 44/1563 [05:33<3:10:16, 7.52s/it]
45
  3%|▎ | 45/1563 [05:41<3:10:51, 7.54s/it]
46
  3%|▎ | 46/1563 [05:48<3:10:04, 7.52s/it]
47
  3%|▎ | 47/1563 [05:56<3:09:33, 7.50s/it]
48
  3%|▎ | 48/1563 [06:03<3:09:09, 7.49s/it]
49
  3%|▎ | 49/1563 [06:11<3:09:18, 7.50s/it]
50
  3%|▎ | 50/1563 [06:18<3:08:52, 7.49s/it]
51
  3%|▎ | 51/1563 [06:26<3:09:22, 7.51s/it]
52
  3%|▎ | 52/1563 [06:33<3:08:50, 7.50s/it]
53
  3%|▎ | 53/1563 [06:41<3:10:13, 7.56s/it]
54
  3%|▎ | 54/1563 [06:48<3:09:17, 7.53s/it]
55
  4%|▎ | 55/1563 [06:56<3:09:26, 7.54s/it]
56
  4%|▎ | 56/1563 [07:03<3:08:40, 7.51s/it]
57
  4%|▎ | 57/1563 [07:11<3:08:00, 7.49s/it]
58
  4%|▎ | 58/1563 [07:18<3:07:22, 7.47s/it]
59
  4%|▍ | 59/1563 [07:26<3:07:27, 7.48s/it]
60
  4%|▍ | 60/1563 [07:33<3:07:09, 7.47s/it]
61
  4%|▍ | 61/1563 [07:40<3:06:46, 7.46s/it]
62
  4%|▍ | 62/1563 [07:48<3:06:32, 7.46s/it]
63
  4%|▍ | 63/1563 [07:55<3:06:54, 7.48s/it]
64
  4%|▍ | 64/1563 [08:03<3:06:41, 7.47s/it]
65
  4%|▍ | 65/1563 [08:10<3:06:17, 7.46s/it]
66
  4%|▍ | 66/1563 [08:18<3:05:56, 7.45s/it]
67
  4%|▍ | 67/1563 [08:25<3:05:46, 7.45s/it]
68
  4%|▍ | 68/1563 [08:33<3:05:45, 7.46s/it]
69
  4%|▍ | 69/1563 [08:40<3:05:31, 7.45s/it]
70
  4%|▍ | 70/1563 [08:48<3:05:25, 7.45s/it]
71
  5%|▍ | 71/1563 [08:55<3:05:24, 7.46s/it]
72
  5%|▍ | 72/1563 [09:03<3:05:26, 7.46s/it]
73
  5%|▍ | 73/1563 [09:10<3:05:29, 7.47s/it]
74
  5%|▍ | 74/1563 [09:17<3:05:30, 7.48s/it]
75
  5%|▍ | 75/1563 [09:25<3:05:22, 7.47s/it]
76
  5%|▍ | 76/1563 [09:32<3:05:08, 7.47s/it]
77
  5%|▍ | 77/1563 [09:40<3:05:12, 7.48s/it]
78
  5%|▍ | 78/1563 [09:47<3:05:02, 7.48s/it]
79
  5%|▌ | 79/1563 [09:55<3:04:53, 7.48s/it]
80
  5%|▌ | 80/1563 [10:02<3:04:35, 7.47s/it]
81
  5%|▌ | 81/1563 [10:10<3:04:30, 7.47s/it]
82
  5%|▌ | 82/1563 [10:17<3:04:14, 7.46s/it]
83
  5%|▌ | 83/1563 [10:25<3:04:00, 7.46s/it]
84
  5%|▌ | 84/1563 [10:32<3:04:24, 7.48s/it]
85
  5%|▌ | 85/1563 [10:40<3:04:03, 7.47s/it]
86
  6%|▌ | 86/1563 [10:47<3:03:51, 7.47s/it]
87
  6%|▌ | 87/1563 [10:55<3:03:53, 7.48s/it]
88
  6%|▌ | 88/1563 [11:02<3:03:29, 7.46s/it]
89
  6%|▌ | 89/1563 [11:10<3:03:15, 7.46s/it]
90
  6%|▌ | 90/1563 [11:17<3:03:14, 7.46s/it]
91
  6%|▌ | 91/1563 [11:24<3:02:53, 7.45s/it]
92
  6%|▌ | 92/1563 [11:32<3:02:55, 7.46s/it]
93
  6%|▌ | 93/1563 [11:39<3:02:44, 7.46s/it]
94
  6%|▌ | 94/1563 [11:47<3:02:32, 7.46s/it]
95
  6%|▌ | 95/1563 [11:54<3:02:21, 7.45s/it]
96
  6%|▌ | 96/1563 [12:02<3:02:10, 7.45s/it]
97
  6%|▌ | 97/1563 [12:09<3:02:27, 7.47s/it]
98
  6%|▋ | 98/1563 [12:17<3:02:13, 7.46s/it]
99
  6%|▋ | 99/1563 [12:24<3:02:29, 7.48s/it]
100
  6%|▋ | 100/1563 [12:32<3:02:14, 7.47s/it]
101
  6%|▋ | 101/1563 [12:39<3:01:43, 7.46s/it]
102
  7%|▋ | 102/1563 [12:47<3:01:33, 7.46s/it]
103
  7%|▋ | 103/1563 [12:54<3:01:13, 7.45s/it]
104
  7%|▋ | 104/1563 [13:01<3:00:57, 7.44s/it]
105
  7%|▋ | 105/1563 [13:09<3:01:25, 7.47s/it]
106
  7%|▋ | 106/1563 [13:16<3:01:16, 7.46s/it]
107
  7%|▋ | 107/1563 [13:24<3:01:08, 7.46s/it]
108
  7%|▋ | 108/1563 [13:31<3:00:53, 7.46s/it]
109
  7%|▋ | 109/1563 [13:39<3:00:38, 7.45s/it]
110
  7%|▋ | 110/1563 [13:46<3:00:47, 7.47s/it]
111
  7%|▋ | 111/1563 [13:54<3:00:31, 7.46s/it]
112
  7%|▋ | 112/1563 [14:01<3:00:41, 7.47s/it]
113
  7%|▋ | 113/1563 [14:09<3:00:34, 7.47s/it]
114
  7%|▋ | 114/1563 [14:16<3:00:19, 7.47s/it]
115
  7%|▋ | 115/1563 [14:24<3:00:35, 7.48s/it]
116
  7%|▋ | 116/1563 [14:31<3:00:16, 7.47s/it]
117
  7%|▋ | 117/1563 [14:38<3:00:02, 7.47s/it]
118
  8%|▊ | 118/1563 [14:46<2:59:46, 7.46s/it]
119
  8%|▊ | 119/1563 [14:53<2:59:42, 7.47s/it]
120
  8%|▊ | 120/1563 [15:01<2:59:24, 7.46s/it]
121
  8%|▊ | 121/1563 [15:08<2:59:17, 7.46s/it]
122
  8%|▊ | 122/1563 [15:16<2:59:04, 7.46s/it]
123
  8%|▊ | 123/1563 [15:23<2:59:03, 7.46s/it]
124
  8%|▊ | 124/1563 [15:31<2:59:29, 7.48s/it]
125
  8%|▊ | 125/1563 [15:38<2:59:18, 7.48s/it]
126
  8%|▊ | 126/1563 [15:46<2:58:59, 7.47s/it]
127
  8%|▊ | 127/1563 [15:53<2:58:52, 7.47s/it]
128
  8%|▊ | 128/1563 [16:01<2:59:40, 7.51s/it]distillm-2-master/scripts/math/gen_0.5.sh: line 11: 88855 Killed python distillm-2-master/generate/generate.py --model Qwen/Qwen2.5-0.5B --output_dir data/dpo/Qwen/Qwen2.5-0.5B/ --batch_size 32 --split train
 
1
+
2
  0%| | 0/1563 [00:00<?, ?it/s]
3
  0%| | 1/1563 [00:08<3:47:56, 8.76s/it]
4
  0%| | 2/1563 [00:16<3:29:49, 8.06s/it]
5
  0%| | 3/1563 [00:24<3:25:04, 7.89s/it]
6
  0%| | 4/1563 [00:31<3:21:31, 7.76s/it]
7
  0%| | 5/1563 [00:39<3:19:22, 7.68s/it]
8
  0%| | 6/1563 [00:46<3:18:29, 7.65s/it]
9
  0%| | 7/1563 [00:54<3:17:49, 7.63s/it]
10
  1%| | 8/1563 [01:01<3:18:04, 7.64s/it]
11
  1%| | 9/1563 [01:09<3:17:31, 7.63s/it]
12
  1%| | 10/1563 [01:17<3:17:01, 7.61s/it]
13
  1%| | 11/1563 [01:24<3:18:14, 7.66s/it]
14
  1%| | 12/1563 [01:32<3:17:33, 7.64s/it]
15
  1%| | 13/1563 [01:40<3:17:22, 7.64s/it]
16
  1%| | 14/1563 [01:47<3:17:08, 7.64s/it]
17
  1%| | 15/1563 [01:55<3:17:07, 7.64s/it]
18
  1%| | 16/1563 [02:02<3:15:56, 7.60s/it]
19
  1%| | 17/1563 [02:10<3:15:18, 7.58s/it]
20
  1%| | 18/1563 [02:17<3:14:37, 7.56s/it]
21
  1%| | 19/1563 [02:25<3:14:16, 7.55s/it]
22
  1%|▏ | 20/1563 [02:32<3:13:37, 7.53s/it]
23
  1%|▏ | 21/1563 [02:40<3:13:04, 7.51s/it]
24
  1%|▏ | 22/1563 [02:47<3:12:41, 7.50s/it]
25
  1%|▏ | 23/1563 [02:55<3:12:21, 7.49s/it]
26
  2%|▏ | 24/1563 [03:02<3:12:08, 7.49s/it]
27
  2%|▏ | 25/1563 [03:10<3:12:03, 7.49s/it]
28
  2%|▏ | 26/1563 [03:17<3:12:06, 7.50s/it]
29
  2%|▏ | 27/1563 [03:25<3:11:44, 7.49s/it]
30
  2%|▏ | 28/1563 [03:32<3:11:49, 7.50s/it]
31
  2%|▏ | 29/1563 [03:40<3:11:29, 7.49s/it]
32
  2%|▏ | 30/1563 [03:47<3:12:05, 7.52s/it]
33
  2%|▏ | 31/1563 [03:55<3:11:48, 7.51s/it]
34
  2%|▏ | 32/1563 [04:02<3:11:47, 7.52s/it]
35
  2%|▏ | 33/1563 [04:10<3:12:07, 7.53s/it]
36
  2%|▏ | 34/1563 [04:18<3:11:56, 7.53s/it]
37
  2%|▏ | 35/1563 [04:25<3:13:25, 7.59s/it]
38
  2%|▏ | 36/1563 [04:33<3:12:44, 7.57s/it]
39
  2%|▏ | 37/1563 [04:40<3:11:43, 7.54s/it]
40
  2%|▏ | 38/1563 [04:48<3:12:57, 7.59s/it]
41
  2%|▏ | 39/1563 [04:56<3:12:35, 7.58s/it]
42
  3%|▎ | 40/1563 [05:03<3:12:00, 7.56s/it]
43
  3%|▎ | 41/1563 [05:11<3:10:58, 7.53s/it]
44
  3%|▎ | 42/1563 [05:18<3:10:18, 7.51s/it]
45
  3%|▎ | 43/1563 [05:25<3:10:03, 7.50s/it]
46
  3%|▎ | 44/1563 [05:33<3:10:16, 7.52s/it]
47
  3%|▎ | 45/1563 [05:41<3:10:51, 7.54s/it]
48
  3%|▎ | 46/1563 [05:48<3:10:04, 7.52s/it]
49
  3%|▎ | 47/1563 [05:56<3:09:33, 7.50s/it]
50
  3%|▎ | 48/1563 [06:03<3:09:09, 7.49s/it]
51
  3%|▎ | 49/1563 [06:11<3:09:18, 7.50s/it]
52
  3%|▎ | 50/1563 [06:18<3:08:52, 7.49s/it]
53
  3%|▎ | 51/1563 [06:26<3:09:22, 7.51s/it]
54
  3%|▎ | 52/1563 [06:33<3:08:50, 7.50s/it]
55
  3%|▎ | 53/1563 [06:41<3:10:13, 7.56s/it]
56
  3%|▎ | 54/1563 [06:48<3:09:17, 7.53s/it]
57
  4%|▎ | 55/1563 [06:56<3:09:26, 7.54s/it]
58
  4%|▎ | 56/1563 [07:03<3:08:40, 7.51s/it]
59
  4%|▎ | 57/1563 [07:11<3:08:00, 7.49s/it]
60
  4%|▎ | 58/1563 [07:18<3:07:22, 7.47s/it]
61
  4%|▍ | 59/1563 [07:26<3:07:27, 7.48s/it]
62
  4%|▍ | 60/1563 [07:33<3:07:09, 7.47s/it]
63
  4%|▍ | 61/1563 [07:40<3:06:46, 7.46s/it]
64
  4%|▍ | 62/1563 [07:48<3:06:32, 7.46s/it]
65
  4%|▍ | 63/1563 [07:55<3:06:54, 7.48s/it]
66
  4%|▍ | 64/1563 [08:03<3:06:41, 7.47s/it]
67
  4%|▍ | 65/1563 [08:10<3:06:17, 7.46s/it]
68
  4%|▍ | 66/1563 [08:18<3:05:56, 7.45s/it]
69
  4%|▍ | 67/1563 [08:25<3:05:46, 7.45s/it]
70
  4%|▍ | 68/1563 [08:33<3:05:45, 7.46s/it]
71
  4%|▍ | 69/1563 [08:40<3:05:31, 7.45s/it]
72
  4%|▍ | 70/1563 [08:48<3:05:25, 7.45s/it]
73
  5%|▍ | 71/1563 [08:55<3:05:24, 7.46s/it]
74
  5%|▍ | 72/1563 [09:03<3:05:26, 7.46s/it]
75
  5%|▍ | 73/1563 [09:10<3:05:29, 7.47s/it]
76
  5%|▍ | 74/1563 [09:17<3:05:30, 7.48s/it]
77
  5%|▍ | 75/1563 [09:25<3:05:22, 7.47s/it]
78
  5%|▍ | 76/1563 [09:32<3:05:08, 7.47s/it]
79
  5%|▍ | 77/1563 [09:40<3:05:12, 7.48s/it]
80
  5%|▍ | 78/1563 [09:47<3:05:02, 7.48s/it]
81
  5%|▌ | 79/1563 [09:55<3:04:53, 7.48s/it]
82
  5%|▌ | 80/1563 [10:02<3:04:35, 7.47s/it]
83
  5%|▌ | 81/1563 [10:10<3:04:30, 7.47s/it]
84
  5%|▌ | 82/1563 [10:17<3:04:14, 7.46s/it]
85
  5%|▌ | 83/1563 [10:25<3:04:00, 7.46s/it]
86
  5%|▌ | 84/1563 [10:32<3:04:24, 7.48s/it]
87
  5%|▌ | 85/1563 [10:40<3:04:03, 7.47s/it]
88
  6%|▌ | 86/1563 [10:47<3:03:51, 7.47s/it]
89
  6%|▌ | 87/1563 [10:55<3:03:53, 7.48s/it]
90
  6%|▌ | 88/1563 [11:02<3:03:29, 7.46s/it]
91
  6%|▌ | 89/1563 [11:10<3:03:15, 7.46s/it]
92
  6%|▌ | 90/1563 [11:17<3:03:14, 7.46s/it]
93
  6%|▌ | 91/1563 [11:24<3:02:53, 7.45s/it]
94
  6%|▌ | 92/1563 [11:32<3:02:55, 7.46s/it]
95
  6%|▌ | 93/1563 [11:39<3:02:44, 7.46s/it]
96
  6%|▌ | 94/1563 [11:47<3:02:32, 7.46s/it]
97
  6%|▌ | 95/1563 [11:54<3:02:21, 7.45s/it]
98
  6%|▌ | 96/1563 [12:02<3:02:10, 7.45s/it]
99
  6%|▌ | 97/1563 [12:09<3:02:27, 7.47s/it]
100
  6%|▋ | 98/1563 [12:17<3:02:13, 7.46s/it]
101
  6%|▋ | 99/1563 [12:24<3:02:29, 7.48s/it]
102
  6%|▋ | 100/1563 [12:32<3:02:14, 7.47s/it]
103
  6%|▋ | 101/1563 [12:39<3:01:43, 7.46s/it]
104
  7%|▋ | 102/1563 [12:47<3:01:33, 7.46s/it]
105
  7%|▋ | 103/1563 [12:54<3:01:13, 7.45s/it]
106
  7%|▋ | 104/1563 [13:01<3:00:57, 7.44s/it]
107
  7%|▋ | 105/1563 [13:09<3:01:25, 7.47s/it]
108
  7%|▋ | 106/1563 [13:16<3:01:16, 7.46s/it]
109
  7%|▋ | 107/1563 [13:24<3:01:08, 7.46s/it]
110
  7%|▋ | 108/1563 [13:31<3:00:53, 7.46s/it]
111
  7%|▋ | 109/1563 [13:39<3:00:38, 7.45s/it]
112
  7%|▋ | 110/1563 [13:46<3:00:47, 7.47s/it]
113
  7%|▋ | 111/1563 [13:54<3:00:31, 7.46s/it]
114
  7%|▋ | 112/1563 [14:01<3:00:41, 7.47s/it]
115
  7%|▋ | 113/1563 [14:09<3:00:34, 7.47s/it]
116
  7%|▋ | 114/1563 [14:16<3:00:19, 7.47s/it]
117
  7%|▋ | 115/1563 [14:24<3:00:35, 7.48s/it]
118
  7%|▋ | 116/1563 [14:31<3:00:16, 7.47s/it]
119
  7%|▋ | 117/1563 [14:38<3:00:02, 7.47s/it]
120
  8%|▊ | 118/1563 [14:46<2:59:46, 7.46s/it]
121
  8%|▊ | 119/1563 [14:53<2:59:42, 7.47s/it]
122
  8%|▊ | 120/1563 [15:01<2:59:24, 7.46s/it]
123
  8%|▊ | 121/1563 [15:08<2:59:17, 7.46s/it]
124
  8%|▊ | 122/1563 [15:16<2:59:04, 7.46s/it]
125
  8%|▊ | 123/1563 [15:23<2:59:03, 7.46s/it]
126
  8%|▊ | 124/1563 [15:31<2:59:29, 7.48s/it]
127
  8%|▊ | 125/1563 [15:38<2:59:18, 7.48s/it]
128
  8%|▊ | 126/1563 [15:46<2:58:59, 7.47s/it]
129
  8%|▊ | 127/1563 [15:53<2:58:52, 7.47s/it]
130
  8%|▊ | 128/1563 [16:01<2:59:40, 7.51s/it]distillm-2-master/scripts/math/gen_0.5.sh: line 11: 88855 Killed python distillm-2-master/generate/generate.py --model Qwen/Qwen2.5-0.5B --output_dir data/dpo/Qwen/Qwen2.5-0.5B/ --batch_size 32 --split train
distillm-2-master/outputs/gen_1.5.log ADDED
Binary file (79.5 kB). View file
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.43.2"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e71ea42d9f1f8cc98d05fa3396f36b93e976ce6261ea1fbb6066f6a3fa20c0
3
+ size 248894656
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0ac5eeecf40db35305c905a601fd0914495ca39f945ad28f604e61a17302fb
3
+ size 6264
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.43.2"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a810c0bda6c27aed9d081fafc370e8a4c59381f7ae9ff5265c2860d97e94e8
3
+ size 248894656
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0ac5eeecf40db35305c905a601fd0914495ca39f945ad28f604e61a17302fb
3
+ size 6264
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.43.2"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0484c1a2833a721079e478a21ce64bc1f0093737b9d18a1b8b74fdc3f05b6fd6
3
+ size 333472608
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
3
+ size 7864
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.43.2"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94eef7a6c569f4494e45491f8011097f1b4e69fae6d0e68274791c2e3c61745
3
+ size 333472608
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
3
+ size 7864
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
3
+ size 7864
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: facebook/opt-1.3b
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.9.0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "facebook/opt-1.3b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 128,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 16,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "down_proj",
23
+ "k_proj",
24
+ "gate_proj",
25
+ "up_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "q_proj"
29
+ ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_dora": true,
32
+ "use_rslora": false
33
+ }
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c4636f3e9cc3f23bc830ba3d22ce72523a0a2add191d787a53094b50d00dec
3
+ size 9763032
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "1": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "2": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ }
21
+ },
22
+ "bos_token": "</s>",
23
+ "clean_up_tokenization_spaces": true,
24
+ "eos_token": "</s>",
25
+ "errors": "replace",
26
+ "model_max_length": 2048,
27
+ "pad_token": "<pad>",
28
+ "tokenizer_class": "GPT2Tokenizer",
29
+ "unk_token": "</s>"
30
+ }