| { |
| "schema_version": 1, |
| "_comment": "NV-EmbedCode-7B (MistralBiDirectional + avg-pool) on Hexagon v81. 7B encoder split into 4 chained W8 context parts via llama_embed_sharded. Host-rope (no Gather), PLAIN rope (Mistral theta 10000, factor 1), bidirectional. Encoder S=128, head_dim 128.", |
| "model": {"name": "nv-embedcode-7b", "family": "embedding", "dsp_arch": "v81", "tokenizer_pre": "default"}, |
| "params": {"hidden": 4096, "vocab": 32000, "n_layers": 32, "head_dim": 128, "max_ctx": 128, "eos_token_id": 2}, |
| "artifacts": { |
| "contexts": { |
| "enc_p0": {"bin": "nvembedcode7b_enc_p0_w8.bin"}, |
| "enc_p1": {"bin": "nvembedcode7b_enc_p1_w8.bin"}, |
| "enc_p2": {"bin": "nvembedcode7b_enc_p2_w8.bin"}, |
| "enc_p3": {"bin": "nvembedcode7b_enc_p3_w8.bin"} |
| }, |
| "embed": "nvembedcode7b_embed_f16.bin", |
| "tokenizer": "tokenizer.json" |
| }, |
| "plan": {"steps": [{"host": "llama_embed_sharded", "params": {"encoder_parts": "nvembedcode7b_enc_p0_w8,nvembedcode7b_enc_p1_w8,nvembedcode7b_enc_p2_w8,nvembedcode7b_enc_p3_w8", "rope_factor": 1.0, "rope_theta": 10000.0, "neg": -50000}}]} |
| } |
|
|