llm_mutil_npu / tests /test_tokenizer.cpp
xianglarry's picture
Initial C++ aclnn EAGER inference for Qwen3-235B-A22B MoE on Ascend 910 × 16 NPU
4b9fefd
// test_tokenizer.cpp — verify encode + decode match Python reference.
#include "tokenizer.h"
#include <cstdio>
#include <string>
int main() {
const std::string model_dir = "/path/to/Qwen3-235B-A22B-Instruct-2507-BF16";
const std::string vocab_path = "tokenizer_data/vocab.bin";
Tokenizer tok;
if (!tok.load(vocab_path)) return 1;
printf("Vocab size: %zu\n", tok.size());
// Test 1: decode known ids (from export_vocab.py)
std::vector<int> ids = {785, 6722, 315, 9625, 374};
std::string decoded = tok.decode(ids);
printf("decode({785,6722,315,9625,374}) = '%s'\n", decoded.c_str());
bool ok1 = (decoded == "The capital of France is");
printf(" %s\n", ok1 ? "OK" : "FAIL");
// Test 2: encode roundtrip via Python subprocess
printf("\nTesting encode_via_python...\n");
std::string prompt = "The capital of France is";
auto enc = tok.encode_via_python(model_dir, prompt, false);
printf("encode('%s') -> [", prompt.c_str());
for (size_t i = 0; i < enc.size(); i++) printf("%s%d", i ? "," : "", enc[i]);
printf("]\n");
bool ok2 = (enc == std::vector<int>{785, 6722, 315, 9625, 374});
printf(" %s (expected [785,6722,315,9625,374])\n", ok2 ? "OK" : "FAIL");
// Test 3: encode then decode roundtrip
std::string rt = tok.decode(enc);
printf("\nencode+decode roundtrip: '%s'\n", rt.c_str());
bool ok3 = (rt == prompt);
printf(" %s\n", ok3 ? "OK" : "FAIL");
// Test 4: chat template
printf("\nTesting chat template...\n");
auto chat_ids = tok.encode_via_python(model_dir, "What is the capital of France?", true);
printf("chat encode size: %zu\n", chat_ids.size());
std::string chat_decoded = tok.decode(chat_ids);
printf("chat decode: %s\n", chat_decoded.c_str());
bool ok4 = (chat_ids.size() > 10 && chat_decoded.find("capital") != std::string::npos);
printf(" %s\n", ok4 ? "OK" : "FAIL");
bool all = ok1 && ok2 && ok3 && ok4;
printf("\n%s\n", all ? "=== test_tokenizer PASS ===" : "=== test_tokenizer FAIL ===");
return all ? 0 : 1;
}