VoCuc commited on Apr 20

Commit

ca469fd

verified ·

1 Parent(s): 1b6acf2

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

distillm-2-master/outputs/gen_0.5.log +1 -0
distillm-2-master/outputs/gen_1.5.log +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/config.json +39 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/generation_config.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/merges.txt +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/model.safetensors +3 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/special_tokens_map.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer_config.json +20 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/trainer_state.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/training_args.bin +3 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/vocab.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/config.json +39 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/generation_config.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/merges.txt +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/model.safetensors +3 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/special_tokens_map.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer_config.json +20 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/trainer_state.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/training_args.bin +3 -0
distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/vocab.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/config.json +39 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/generation_config.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/merges.txt +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/model.safetensors +3 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/special_tokens_map.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer_config.json +20 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/trainer_state.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/training_args.bin +3 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/vocab.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/config.json +39 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/generation_config.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/merges.txt +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/model.safetensors +3 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/special_tokens_map.json +6 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer_config.json +20 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/trainer_state.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/training_args.bin +3 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/vocab.json +0 -0
distillm-2-master/outputs/gpt2-0.1b-span-distillm2/training_args.bin +3 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/README.md +202 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_config.json +33 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_model.safetensors +3 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/merges.txt +0 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/special_tokens_map.json +30 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer.json +0 -0
distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer_config.json +30 -0

distillm-2-master/outputs/gen_0.5.log ADDED Viewed

@@ -0,0 +1 @@
  0%|          | 0/1563 [00:00<?, ?it/s]
  0%|          | 1/1563 [00:08<3:47:56,  8.76s/it]
  0%|          | 2/1563 [00:16<3:29:49,  8.06s/it]
  0%|          | 3/1563 [00:24<3:25:04,  7.89s/it]
  0%|          | 4/1563 [00:31<3:21:31,  7.76s/it]
  0%|          | 5/1563 [00:39<3:19:22,  7.68s/it]
  0%|          | 6/1563 [00:46<3:18:29,  7.65s/it]
  0%|          | 7/1563 [00:54<3:17:49,  7.63s/it]
  1%|          | 8/1563 [01:01<3:18:04,  7.64s/it]
  1%|          | 9/1563 [01:09<3:17:31,  7.63s/it]
  1%|          | 10/1563 [01:17<3:17:01,  7.61s/it]
  1%|          | 11/1563 [01:24<3:18:14,  7.66s/it]
  1%|          | 12/1563 [01:32<3:17:33,  7.64s/it]
  1%|          | 13/1563 [01:40<3:17:22,  7.64s/it]
  1%|          | 14/1563 [01:47<3:17:08,  7.64s/it]
  1%|          | 15/1563 [01:55<3:17:07,  7.64s/it]
  1%|          | 16/1563 [02:02<3:15:56,  7.60s/it]
  1%|          | 17/1563 [02:10<3:15:18,  7.58s/it]
  1%|          | 18/1563 [02:17<3:14:37,  7.56s/it]
  1%|          | 19/1563 [02:25<3:14:16,  7.55s/it]
  1%|▏         | 20/1563 [02:32<3:13:37,  7.53s/it]
  1%|▏         | 21/1563 [02:40<3:13:04,  7.51s/it]
  1%|▏         | 22/1563 [02:47<3:12:41,  7.50s/it]
  1%|▏         | 23/1563 [02:55<3:12:21,  7.49s/it]
  2%|▏         | 24/1563 [03:02<3:12:08,  7.49s/it]
  2%|▏         | 25/1563 [03:10<3:12:03,  7.49s/it]
  2%|▏         | 26/1563 [03:17<3:12:06,  7.50s/it]
  2%|▏         | 27/1563 [03:25<3:11:44,  7.49s/it]
  2%|▏         | 28/1563 [03:32<3:11:49,  7.50s/it]
  2%|▏         | 29/1563 [03:40<3:11:29,  7.49s/it]
  2%|▏         | 30/1563 [03:47<3:12:05,  7.52s/it]
  2%|▏         | 31/1563 [03:55<3:11:48,  7.51s/it]
  2%|▏         | 32/1563 [04:02<3:11:47,  7.52s/it]
  2%|▏         | 33/1563 [04:10<3:12:07,  7.53s/it]
  2%|▏         | 34/1563 [04:18<3:11:56,  7.53s/it]
  2%|▏         | 35/1563 [04:25<3:13:25,  7.59s/it]
  2%|▏         | 36/1563 [04:33<3:12:44,  7.57s/it]
  2%|▏         | 37/1563 [04:40<3:11:43,  7.54s/it]
  2%|▏         | 38/1563 [04:48<3:12:57,  7.59s/it]
  2%|▏         | 39/1563 [04:56<3:12:35,  7.58s/it]
  3%|▎         | 40/1563 [05:03<3:12:00,  7.56s/it]
  3%|▎         | 41/1563 [05:11<3:10:58,  7.53s/it]
  3%|▎         | 42/1563 [05:18<3:10:18,  7.51s/it]
  3%|▎         | 43/1563 [05:25<3:10:03,  7.50s/it]
  3%|▎         | 44/1563 [05:33<3:10:16,  7.52s/it]
  3%|▎         | 45/1563 [05:41<3:10:51,  7.54s/it]
  3%|▎         | 46/1563 [05:48<3:10:04,  7.52s/it]
  3%|▎         | 47/1563 [05:56<3:09:33,  7.50s/it]
  3%|▎         | 48/1563 [06:03<3:09:09,  7.49s/it]
  3%|▎         | 49/1563 [06:11<3:09:18,  7.50s/it]
  3%|▎         | 50/1563 [06:18<3:08:52,  7.49s/it]
  3%|▎         | 51/1563 [06:26<3:09:22,  7.51s/it]
  3%|▎         | 52/1563 [06:33<3:08:50,  7.50s/it]
  3%|▎         | 53/1563 [06:41<3:10:13,  7.56s/it]
  3%|▎         | 54/1563 [06:48<3:09:17,  7.53s/it]
  4%|▎         | 55/1563 [06:56<3:09:26,  7.54s/it]
  4%|▎         | 56/1563 [07:03<3:08:40,  7.51s/it]
  4%|▎         | 57/1563 [07:11<3:08:00,  7.49s/it]
  4%|▎         | 58/1563 [07:18<3:07:22,  7.47s/it]
  4%|▍         | 59/1563 [07:26<3:07:27,  7.48s/it]
  4%|▍         | 60/1563 [07:33<3:07:09,  7.47s/it]
  4%|▍         | 61/1563 [07:40<3:06:46,  7.46s/it]
  4%|▍         | 62/1563 [07:48<3:06:32,  7.46s/it]
  4%|▍         | 63/1563 [07:55<3:06:54,  7.48s/it]
  4%|▍         | 64/1563 [08:03<3:06:41,  7.47s/it]
  4%|▍         | 65/1563 [08:10<3:06:17,  7.46s/it]
  4%|▍         | 66/1563 [08:18<3:05:56,  7.45s/it]
  4%|▍         | 67/1563 [08:25<3:05:46,  7.45s/it]
  4%|▍         | 68/1563 [08:33<3:05:45,  7.46s/it]
  4%|▍         | 69/1563 [08:40<3:05:31,  7.45s/it]
  4%|▍         | 70/1563 [08:48<3:05:25,  7.45s/it]
  5%|▍         | 71/1563 [08:55<3:05:24,  7.46s/it]
  5%|▍         | 72/1563 [09:03<3:05:26,  7.46s/it]
  5%|▍         | 73/1563 [09:10<3:05:29,  7.47s/it]
  5%|▍         | 74/1563 [09:17<3:05:30,  7.48s/it]
  5%|▍         | 75/1563 [09:25<3:05:22,  7.47s/it]
  5%|▍         | 76/1563 [09:32<3:05:08,  7.47s/it]
  5%|▍         | 77/1563 [09:40<3:05:12,  7.48s/it]
  5%|▍         | 78/1563 [09:47<3:05:02,  7.48s/it]
  5%|▌         | 79/1563 [09:55<3:04:53,  7.48s/it]
  5%|▌         | 80/1563 [10:02<3:04:35,  7.47s/it]
  5%|▌         | 81/1563 [10:10<3:04:30,  7.47s/it]
  5%|▌         | 82/1563 [10:17<3:04:14,  7.46s/it]
  5%|▌         | 83/1563 [10:25<3:04:00,  7.46s/it]
  5%|▌         | 84/1563 [10:32<3:04:24,  7.48s/it]
  5%|▌         | 85/1563 [10:40<3:04:03,  7.47s/it]
  6%|▌         | 86/1563 [10:47<3:03:51,  7.47s/it]
  6%|▌         | 87/1563 [10:55<3:03:53,  7.48s/it]
  6%|▌         | 88/1563 [11:02<3:03:29,  7.46s/it]
  6%|▌         | 89/1563 [11:10<3:03:15,  7.46s/it]
  6%|▌         | 90/1563 [11:17<3:03:14,  7.46s/it]
  6%|▌         | 91/1563 [11:24<3:02:53,  7.45s/it]
  6%|▌         | 92/1563 [11:32<3:02:55,  7.46s/it]
  6%|▌         | 93/1563 [11:39<3:02:44,  7.46s/it]
  6%|▌         | 94/1563 [11:47<3:02:32,  7.46s/it]
  6%|▌         | 95/1563 [11:54<3:02:21,  7.45s/it]
  6%|▌         | 96/1563 [12:02<3:02:10,  7.45s/it]
  6%|▌         | 97/1563 [12:09<3:02:27,  7.47s/it]
  6%|▋         | 98/1563 [12:17<3:02:13,  7.46s/it]
  6%|▋         | 99/1563 [12:24<3:02:29,  7.48s/it]
  6%|▋         | 100/1563 [12:32<3:02:14,  7.47s/it]
  6%|▋         | 101/1563 [12:39<3:01:43,  7.46s/it]
  7%|▋         | 102/1563 [12:47<3:01:33,  7.46s/it]
  7%|▋         | 103/1563 [12:54<3:01:13,  7.45s/it]
  7%|▋         | 104/1563 [13:01<3:00:57,  7.44s/it]
  7%|▋         | 105/1563 [13:09<3:01:25,  7.47s/it]
  7%|▋         | 106/1563 [13:16<3:01:16,  7.46s/it]
  7%|▋         | 107/1563 [13:24<3:01:08,  7.46s/it]
  7%|▋         | 108/1563 [13:31<3:00:53,  7.46s/it]
  7%|▋         | 109/1563 [13:39<3:00:38,  7.45s/it]
  7%|▋         | 110/1563 [13:46<3:00:47,  7.47s/it]
  7%|▋         | 111/1563 [13:54<3:00:31,  7.46s/it]
  7%|▋         | 112/1563 [14:01<3:00:41,  7.47s/it]
  7%|▋         | 113/1563 [14:09<3:00:34,  7.47s/it]
  7%|▋         | 114/1563 [14:16<3:00:19,  7.47s/it]
  7%|▋         | 115/1563 [14:24<3:00:35,  7.48s/it]
  7%|▋         | 116/1563 [14:31<3:00:16,  7.47s/it]
  7%|▋         | 117/1563 [14:38<3:00:02,  7.47s/it]
  8%|▊         | 118/1563 [14:46<2:59:46,  7.46s/it]
  8%|▊         | 119/1563 [14:53<2:59:42,  7.47s/it]
  8%|▊         | 120/1563 [15:01<2:59:24,  7.46s/it]
  8%|▊         | 121/1563 [15:08<2:59:17,  7.46s/it]
  8%|▊         | 122/1563 [15:16<2:59:04,  7.46s/it]
  8%|▊         | 123/1563 [15:23<2:59:03,  7.46s/it]
  8%|▊         | 124/1563 [15:31<2:59:29,  7.48s/it]
  8%|▊         | 125/1563 [15:38<2:59:18,  7.48s/it]
  8%|▊         | 126/1563 [15:46<2:58:59,  7.47s/it]
  8%|▊         | 127/1563 [15:53<2:58:52,  7.47s/it]
  8%|▊         | 128/1563 [16:01<2:59:40,  7.51s/it]distillm-2-master/scripts/math/gen_0.5.sh: line 11: 88855 Killed                  python distillm-2-master/generate/generate.py --model Qwen/Qwen2.5-0.5B --output_dir data/dpo/Qwen/Qwen2.5-0.5B/ --batch_size 32 --split train

  0%|          | 0/1563 [00:00<?, ?it/s]
  0%|          | 1/1563 [00:08<3:47:56,  8.76s/it]
  0%|          | 2/1563 [00:16<3:29:49,  8.06s/it]
  0%|          | 3/1563 [00:24<3:25:04,  7.89s/it]
  0%|          | 4/1563 [00:31<3:21:31,  7.76s/it]
  0%|          | 5/1563 [00:39<3:19:22,  7.68s/it]
  0%|          | 6/1563 [00:46<3:18:29,  7.65s/it]
  0%|          | 7/1563 [00:54<3:17:49,  7.63s/it]
  1%|          | 8/1563 [01:01<3:18:04,  7.64s/it]
  1%|          | 9/1563 [01:09<3:17:31,  7.63s/it]
  1%|          | 10/1563 [01:17<3:17:01,  7.61s/it]
  1%|          | 11/1563 [01:24<3:18:14,  7.66s/it]
  1%|          | 12/1563 [01:32<3:17:33,  7.64s/it]
  1%|          | 13/1563 [01:40<3:17:22,  7.64s/it]
  1%|          | 14/1563 [01:47<3:17:08,  7.64s/it]
  1%|          | 15/1563 [01:55<3:17:07,  7.64s/it]
  1%|          | 16/1563 [02:02<3:15:56,  7.60s/it]
  1%|          | 17/1563 [02:10<3:15:18,  7.58s/it]
  1%|          | 18/1563 [02:17<3:14:37,  7.56s/it]
  1%|          | 19/1563 [02:25<3:14:16,  7.55s/it]
  1%|▏         | 20/1563 [02:32<3:13:37,  7.53s/it]
  1%|▏         | 21/1563 [02:40<3:13:04,  7.51s/it]
  1%|▏         | 22/1563 [02:47<3:12:41,  7.50s/it]
  1%|▏         | 23/1563 [02:55<3:12:21,  7.49s/it]
  2%|▏         | 24/1563 [03:02<3:12:08,  7.49s/it]
  2%|▏         | 25/1563 [03:10<3:12:03,  7.49s/it]
  2%|▏         | 26/1563 [03:17<3:12:06,  7.50s/it]
  2%|▏         | 27/1563 [03:25<3:11:44,  7.49s/it]
  2%|▏         | 28/1563 [03:32<3:11:49,  7.50s/it]
  2%|▏         | 29/1563 [03:40<3:11:29,  7.49s/it]
  2%|▏         | 30/1563 [03:47<3:12:05,  7.52s/it]
  2%|▏         | 31/1563 [03:55<3:11:48,  7.51s/it]
  2%|▏         | 32/1563 [04:02<3:11:47,  7.52s/it]
  2%|▏         | 33/1563 [04:10<3:12:07,  7.53s/it]
  2%|▏         | 34/1563 [04:18<3:11:56,  7.53s/it]
  2%|▏         | 35/1563 [04:25<3:13:25,  7.59s/it]
  2%|▏         | 36/1563 [04:33<3:12:44,  7.57s/it]
  2%|▏         | 37/1563 [04:40<3:11:43,  7.54s/it]
  2%|▏         | 38/1563 [04:48<3:12:57,  7.59s/it]
  2%|▏         | 39/1563 [04:56<3:12:35,  7.58s/it]
  3%|▎         | 40/1563 [05:03<3:12:00,  7.56s/it]
  3%|▎         | 41/1563 [05:11<3:10:58,  7.53s/it]
  3%|▎         | 42/1563 [05:18<3:10:18,  7.51s/it]
  3%|▎         | 43/1563 [05:25<3:10:03,  7.50s/it]
  3%|▎         | 44/1563 [05:33<3:10:16,  7.52s/it]
  3%|▎         | 45/1563 [05:41<3:10:51,  7.54s/it]
  3%|▎         | 46/1563 [05:48<3:10:04,  7.52s/it]
  3%|▎         | 47/1563 [05:56<3:09:33,  7.50s/it]
  3%|▎         | 48/1563 [06:03<3:09:09,  7.49s/it]
  3%|▎         | 49/1563 [06:11<3:09:18,  7.50s/it]
  3%|▎         | 50/1563 [06:18<3:08:52,  7.49s/it]
  3%|▎         | 51/1563 [06:26<3:09:22,  7.51s/it]
  3%|▎         | 52/1563 [06:33<3:08:50,  7.50s/it]
  3%|▎         | 53/1563 [06:41<3:10:13,  7.56s/it]
  3%|▎         | 54/1563 [06:48<3:09:17,  7.53s/it]
  4%|▎         | 55/1563 [06:56<3:09:26,  7.54s/it]
  4%|▎         | 56/1563 [07:03<3:08:40,  7.51s/it]
  4%|▎         | 57/1563 [07:11<3:08:00,  7.49s/it]
  4%|▎         | 58/1563 [07:18<3:07:22,  7.47s/it]
  4%|▍         | 59/1563 [07:26<3:07:27,  7.48s/it]
  4%|▍         | 60/1563 [07:33<3:07:09,  7.47s/it]
  4%|▍         | 61/1563 [07:40<3:06:46,  7.46s/it]
  4%|▍         | 62/1563 [07:48<3:06:32,  7.46s/it]
  4%|▍         | 63/1563 [07:55<3:06:54,  7.48s/it]
  4%|▍         | 64/1563 [08:03<3:06:41,  7.47s/it]
  4%|▍         | 65/1563 [08:10<3:06:17,  7.46s/it]
  4%|▍         | 66/1563 [08:18<3:05:56,  7.45s/it]
  4%|▍         | 67/1563 [08:25<3:05:46,  7.45s/it]
  4%|▍         | 68/1563 [08:33<3:05:45,  7.46s/it]
  4%|▍         | 69/1563 [08:40<3:05:31,  7.45s/it]
  4%|▍         | 70/1563 [08:48<3:05:25,  7.45s/it]
  5%|▍         | 71/1563 [08:55<3:05:24,  7.46s/it]
  5%|▍         | 72/1563 [09:03<3:05:26,  7.46s/it]
  5%|▍         | 73/1563 [09:10<3:05:29,  7.47s/it]
  5%|▍         | 74/1563 [09:17<3:05:30,  7.48s/it]
  5%|▍         | 75/1563 [09:25<3:05:22,  7.47s/it]
  5%|▍         | 76/1563 [09:32<3:05:08,  7.47s/it]
  5%|▍         | 77/1563 [09:40<3:05:12,  7.48s/it]
  5%|▍         | 78/1563 [09:47<3:05:02,  7.48s/it]
  5%|▌         | 79/1563 [09:55<3:04:53,  7.48s/it]
  5%|▌         | 80/1563 [10:02<3:04:35,  7.47s/it]
  5%|▌         | 81/1563 [10:10<3:04:30,  7.47s/it]
  5%|▌         | 82/1563 [10:17<3:04:14,  7.46s/it]
  5%|▌         | 83/1563 [10:25<3:04:00,  7.46s/it]
  5%|▌         | 84/1563 [10:32<3:04:24,  7.48s/it]
  5%|▌         | 85/1563 [10:40<3:04:03,  7.47s/it]
  6%|▌         | 86/1563 [10:47<3:03:51,  7.47s/it]
  6%|▌         | 87/1563 [10:55<3:03:53,  7.48s/it]
  6%|▌         | 88/1563 [11:02<3:03:29,  7.46s/it]
  6%|▌         | 89/1563 [11:10<3:03:15,  7.46s/it]
  6%|▌         | 90/1563 [11:17<3:03:14,  7.46s/it]
  6%|▌         | 91/1563 [11:24<3:02:53,  7.45s/it]
  6%|▌         | 92/1563 [11:32<3:02:55,  7.46s/it]
  6%|▌         | 93/1563 [11:39<3:02:44,  7.46s/it]
  6%|▌         | 94/1563 [11:47<3:02:32,  7.46s/it]
  6%|▌         | 95/1563 [11:54<3:02:21,  7.45s/it]
  6%|▌         | 96/1563 [12:02<3:02:10,  7.45s/it]
  6%|▌         | 97/1563 [12:09<3:02:27,  7.47s/it]
  6%|▋         | 98/1563 [12:17<3:02:13,  7.46s/it]
  6%|▋         | 99/1563 [12:24<3:02:29,  7.48s/it]
  6%|▋         | 100/1563 [12:32<3:02:14,  7.47s/it]
  6%|▋         | 101/1563 [12:39<3:01:43,  7.46s/it]
  7%|▋         | 102/1563 [12:47<3:01:33,  7.46s/it]
  7%|▋         | 103/1563 [12:54<3:01:13,  7.45s/it]
  7%|▋         | 104/1563 [13:01<3:00:57,  7.44s/it]
  7%|▋         | 105/1563 [13:09<3:01:25,  7.47s/it]
  7%|▋         | 106/1563 [13:16<3:01:16,  7.46s/it]
  7%|▋         | 107/1563 [13:24<3:01:08,  7.46s/it]
  7%|▋         | 108/1563 [13:31<3:00:53,  7.46s/it]
  7%|▋         | 109/1563 [13:39<3:00:38,  7.45s/it]
  7%|▋         | 110/1563 [13:46<3:00:47,  7.47s/it]
  7%|▋         | 111/1563 [13:54<3:00:31,  7.46s/it]
  7%|▋         | 112/1563 [14:01<3:00:41,  7.47s/it]
  7%|▋         | 113/1563 [14:09<3:00:34,  7.47s/it]
  7%|▋         | 114/1563 [14:16<3:00:19,  7.47s/it]
  7%|▋         | 115/1563 [14:24<3:00:35,  7.48s/it]
  7%|▋         | 116/1563 [14:31<3:00:16,  7.47s/it]
  7%|▋         | 117/1563 [14:38<3:00:02,  7.47s/it]
  8%|▊         | 118/1563 [14:46<2:59:46,  7.46s/it]
  8%|▊         | 119/1563 [14:53<2:59:42,  7.47s/it]
  8%|▊         | 120/1563 [15:01<2:59:24,  7.46s/it]
  8%|▊         | 121/1563 [15:08<2:59:17,  7.46s/it]
  8%|▊         | 122/1563 [15:16<2:59:04,  7.46s/it]
  8%|▊         | 123/1563 [15:23<2:59:03,  7.46s/it]
  8%|▊         | 124/1563 [15:31<2:59:29,  7.48s/it]
  8%|▊         | 125/1563 [15:38<2:59:18,  7.48s/it]
  8%|▊         | 126/1563 [15:46<2:58:59,  7.47s/it]
  8%|▊         | 127/1563 [15:53<2:58:52,  7.47s/it]
  8%|▊         | 128/1563 [16:01<2:59:40,  7.51s/it]distillm-2-master/scripts/math/gen_0.5.sh: line 11: 88855 Killed                  python distillm-2-master/generate/generate.py --model Qwen/Qwen2.5-0.5B --output_dir data/dpo/Qwen/Qwen2.5-0.5B/ --batch_size 32 --split train

distillm-2-master/outputs/gen_1.5.log ADDED Viewed

Binary file (79.5 kB). View file

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "openai-community/gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.43.2"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e71ea42d9f1f8cc98d05fa3396f36b93e976ce6261ea1fbb6066f6a3fa20c0
+size 248894656

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a0ac5eeecf40db35305c905a601fd0914495ca39f945ad28f604e61a17302fb
+size 6264

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-5718/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "openai-community/gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.43.2"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a810c0bda6c27aed9d081fafc370e8a4c59381f7ae9ff5265c2860d97e94e8
+size 248894656

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a0ac5eeecf40db35305c905a601fd0914495ca39f945ad28f604e61a17302fb
+size 6264

distillm-2-master/outputs/gpt2-0.1b-distillm2/checkpoint-7145/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "openai-community/gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.43.2"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0484c1a2833a721079e478a21ce64bc1f0093737b9d18a1b8b74fdc3f05b6fd6
+size 333472608

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
+size 7864

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-5718/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "openai-community/gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.43.2"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b94eef7a6c569f4494e45491f8011097f1b4e69fae6d0e68274791c2e3c61745
+size 333472608

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
+size 7864

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/checkpoint-7145/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/gpt2-0.1b-span-distillm2/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f20537b7b0f8f6695ae35b38a65104662891ff13d88d3efccc1b2261f369aece
+size 7864

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: facebook/opt-1.3b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.0

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "facebook/opt-1.3b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "k_proj",
+    "gate_proj",
+    "up_proj",
+    "o_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": true,
+  "use_rslora": false
+}

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12c4636f3e9cc3f23bc830ba3d22ce72523a0a2add191d787a53094b50d00dec
+size 9763032

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distillm-2-master/outputs/opt-1.3b-distillm2/checkpoint-4287/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_bos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "</s>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "errors": "replace",
+  "model_max_length": 2048,
+  "pad_token": "<pad>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "</s>"
+}