File size: 1,108 Bytes
2734f08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{
  "schema_version": 1,
  "_comment": "NV-EmbedCode-7B (MistralBiDirectional + avg-pool) on Hexagon v81. 7B encoder split into 4 chained W8 context parts via llama_embed_sharded. Host-rope (no Gather), PLAIN rope (Mistral theta 10000, factor 1), bidirectional. Encoder S=128, head_dim 128.",
  "model": {"name": "nv-embedcode-7b", "family": "embedding", "dsp_arch": "v81", "tokenizer_pre": "default"},
  "params": {"hidden": 4096, "vocab": 32000, "n_layers": 32, "head_dim": 128, "max_ctx": 128, "eos_token_id": 2},
  "artifacts": {
    "contexts": {
      "enc_p0": {"bin": "nvembedcode7b_enc_p0_w8.bin"},
      "enc_p1": {"bin": "nvembedcode7b_enc_p1_w8.bin"},
      "enc_p2": {"bin": "nvembedcode7b_enc_p2_w8.bin"},
      "enc_p3": {"bin": "nvembedcode7b_enc_p3_w8.bin"}
    },
    "embed": "nvembedcode7b_embed_f16.bin",
    "tokenizer": "tokenizer.json"
  },
  "plan": {"steps": [{"host": "llama_embed_sharded", "params": {"encoder_parts": "nvembedcode7b_enc_p0_w8,nvembedcode7b_enc_p1_w8,nvembedcode7b_enc_p2_w8,nvembedcode7b_enc_p3_w8", "rope_factor": 1.0, "rope_theta": 10000.0, "neg": -50000}}]}
}