Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +5 -5
comparison_graph.png +0 -0
config.json +6 -3
generation_config.json +2 -1
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +2 -2
model.safetensors.index.json +8 -8
tokenizer.json +2 -2
tokenizer_config.json +7 -185

README.md CHANGED Viewed

@@ -27,18 +27,18 @@ This model is a **aggressively pruned** version of [Qwen/Qwen2.5-3B-Instruct](ht
 | Category | Original | Pruned | Change |
 |----------|----------|--------|--------|
-| **Python** | 100.0% | 60.0% ⭐ | ↓ 40.0% |
 | Html | 6.7% | 0.0% | ↓ 6.7% |
 | Trivia | 66.7% | 60.0% | ↓ 6.7% |
 | Math | 60.0% | 66.7% | ↑ 6.7% |
-| Reasoning | 100.0% | 93.3% | ↓ 6.7% |
-| Medical | 86.7% | 73.3% | ↓ 13.3% |
 | Linux | 100.0% | 100.0% | → |
 | Writing | 73.3% | 73.3% | → |
-**Average**: 74.2% → 65.8% (-8.3%)
-**Python Retention**: 60.0% of original performance
 ![Comparison Graph](comparison_graph.png)

 | Category | Original | Pruned | Change |
 |----------|----------|--------|--------|
+| **Python** | 100.0% | 40.0% ⭐ | ↓ 60.0% |
 | Html | 6.7% | 0.0% | ↓ 6.7% |
 | Trivia | 66.7% | 60.0% | ↓ 6.7% |
 | Math | 60.0% | 66.7% | ↑ 6.7% |
+| Reasoning | 100.0% | 86.7% | ↓ 13.3% |
+| Medical | 86.7% | 80.0% | ↓ 6.7% |
 | Linux | 100.0% | 100.0% | → |
 | Writing | 73.3% | 73.3% | → |
+**Average**: 74.2% → 63.3% (-10.8%)
+**Python Retention**: 40.0% of original performance
 ![Comparison Graph](comparison_graph.png)

comparison_graph.png CHANGED Viewed

config.json CHANGED Viewed

@@ -54,12 +54,15 @@
   "num_attention_heads": 16,
   "num_hidden_layers": 36,
   "num_key_value_heads": 2,
   "rms_norm_eps": 1e-06,
-  "rope_scaling": null,
-  "rope_theta": 1000000.0,
   "sliding_window": null,
   "tie_word_embeddings": true,
-  "transformers_version": "4.57.6",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "num_attention_heads": 16,
   "num_hidden_layers": 36,
   "num_key_value_heads": 2,
+  "pad_token_id": null,
   "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000.0,
+    "rope_type": "default"
+  },
   "sliding_window": null,
   "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -1,10 +1,11 @@
 {
   "bos_token_id": 151643,
   "eos_token_id": [
     151645,
     151643
   ],
   "pad_token_id": 151643,
   "repetition_penalty": 1.05,
-  "transformers_version": "4.57.6"
 }

 {
   "bos_token_id": 151643,
+  "do_sample": false,
   "eos_token_id": [
     151645,
     151643
   ],
   "pad_token_id": 151643,
   "repetition_penalty": 1.05,
+  "transformers_version": "5.0.0"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa227934eee835951d3bef5d549af35efe0623a86a611a7b7ef4705a9fd7ae05
-size 3968650256

 version https://git-lfs.github.com/spec/v1
+oid sha256:c32815103a55a91a5a9aaab5031ab4617fe45d38948ee2ba0e0e64a5ffe6336d
+size 3995916600

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65c5a84b866ea50d060760a42b80c65c80cba711982caa0a8b71361ba6bd26c4
-size 2203276288

 version https://git-lfs.github.com/spec/v1
+oid sha256:726f87d51e70d642a1ef788b0659139104b53042a29d4968ae703ed35499b284
+size 2176009944

model.safetensors.index.json CHANGED Viewed

@@ -173,18 +173,18 @@
     "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
     "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
-    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
-    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
-    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
     "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
     "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
-    "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
-    "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
-    "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
-    "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
-    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
     "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",

     "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
     "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
     "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
     "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
     "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
     "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:524cc986cd91c8377344667a68c65289476ab2fc67f83f57b36362811eb4dd2f
-size 11422163

 version https://git-lfs.github.com/spec/v1
+oid sha256:51354673edf4300eb841665e1fb684cc1badea87c49d5de6ef09981151683508
+size 11422159

tokenizer_config.json CHANGED Viewed

@@ -1,185 +1,11 @@
 {
-  "add_bos_token": false,
   "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151646": {
-      "content": "<|object_ref_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151647": {
-      "content": "<|object_ref_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151648": {
-      "content": "<|box_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151649": {
-      "content": "<|box_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151650": {
-      "content": "<|quad_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151651": {
-      "content": "<|quad_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151652": {
-      "content": "<|vision_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151653": {
-      "content": "<|vision_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151654": {
-      "content": "<|vision_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151655": {
-      "content": "<|image_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151656": {
-      "content": "<|video_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151657": {
-      "content": "<tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151658": {
-      "content": "</tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151659": {
-      "content": "<|fim_prefix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151660": {
-      "content": "<|fim_middle|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151661": {
-      "content": "<|fim_suffix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151662": {
-      "content": "<|fim_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151663": {
-      "content": "<|repo_name|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151664": {
-      "content": "<|file_sep|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    }
-  },
-  "additional_special_tokens": [
     "<|im_start|>",
     "<|im_end|>",
     "<|object_ref_start|>",
@@ -194,11 +20,7 @@
     "<|image_pad|>",
     "<|video_pad|>"
   ],
-  "bos_token": null,
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "extra_special_tokens": {},
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,

 {
   "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
     "<|im_start|>",
     "<|im_end|>",
     "<|object_ref_start|>",
     "<|image_pad|>",
     "<|video_pad|>"
   ],
+  "is_local": false,
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,