jeffasante commited on
Commit
801b2cf
·
verified ·
1 Parent(s): 3f4978c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -66,3 +66,4 @@ qwen3.5-0.8b-v1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
66
  smollm2-360m-q1-v1/smollm2-360m-int8-v1.cellm filter=lfs diff=lfs merge=lfs -text
67
  smolvlm-256m-instruct-f16-full/smolvlm-256m-instruct-f16-full.cellm filter=lfs diff=lfs merge=lfs -text
68
  smolvlm-256m-instruct-int8-v1/smolvlm-256m-instruct-int8-v1.cellm filter=lfs diff=lfs merge=lfs -text
 
 
66
  smollm2-360m-q1-v1/smollm2-360m-int8-v1.cellm filter=lfs diff=lfs merge=lfs -text
67
  smolvlm-256m-instruct-f16-full/smolvlm-256m-instruct-f16-full.cellm filter=lfs diff=lfs merge=lfs -text
68
  smolvlm-256m-instruct-int8-v1/smolvlm-256m-instruct-int8-v1.cellm filter=lfs diff=lfs merge=lfs -text
69
+ nanowhale-100m-v1/nanowhale-100m-v1.cellm filter=lfs diff=lfs merge=lfs -text
nanowhale-100m-v1/README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NanoWhale-100M (cellm)
2
+
3
+ NanoWhale-100M is a tiny 100M parameter model based on the DeepSeek-V4 architecture (MLA + MoE), converted to the cellm format for efficient on-device inference.
4
+
5
+ ## Model Details
6
+
7
+ - **Architecture**: DeepSeek-V4 (MLA + MoE)
8
+ - **Parameters**: ~100M
9
+ - **Layers**: 8
10
+ - **Hidden Size**: 320
11
+ - **MLA Config**: 8 heads, 96 head_dim, 32 qk_rope_head_dim
12
+ - **MoE Config**: 4 routed experts, 1 shared expert, 2 experts per token
13
+ - **Vocab Size**: 129,280
14
+
15
+ ## Files
16
+
17
+ | File | Format | Size |
18
+ |------|--------|------|
19
+ | nanowhale-100m-v1.cellm | f16/int8 | 210 MB |
20
+
21
+ ## Usage
22
+
23
+ ```sh
24
+ ./target/release/infer \
25
+ --model nanowhale-100m-v1.cellm \
26
+ --tokenizer tokenizer.json \
27
+ --prompt "<|begin of sentence|><|User|>what's sycophancy?<|Assistant|>" \
28
+ --gen 100 --temperature 0 --backend cpu
29
+ ```
30
+
31
+ ## Notes
32
+
33
+ - This model uses **Multi-head Latent Attention (MLA)** for efficient KV cache management.
34
+ - It uses **DeepSeekMoE** with routed and shared experts.
35
+ - Designed for extremely lightweight inference on mobile and edge devices.
nanowhale-100m-v1/nanowhale-100m-v1.cellm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19bd7cd4d517c3800da139b2b0709eb4ece4742fd75604acba7b3df371ac155b
3
+ size 220774656
nanowhale-100m-v1/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nanowhale-100m-v1/tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin▁of▁sentence|>",
4
+ "eos_token": "<|end▁of▁sentence|>",
5
+ "is_local": true,
6
+ "local_files_only": false,
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "pad_token": "<|end▁of▁sentence|>",
9
+ "tokenizer_class": "TokenizersBackend"
10
+ }