| |
| |
| |
| |
| |
| |
| #include "runner.h" |
|
|
| #include <cmath> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| #include <fstream> |
| #include <vector> |
|
|
| static float bf16_to_float(uint16_t x) { |
| uint32_t u = (uint32_t)x << 16; float f; std::memcpy(&f, &u, 4); return f; |
| } |
| static std::vector<uint8_t> read_file(const std::string& p) { |
| std::ifstream f(p, std::ios::binary | std::ios::ate); size_t s = f.tellg(); |
| f.seekg(0); std::vector<uint8_t> v(s); f.read((char*)v.data(), s); return v; |
| } |
|
|
| int main() { |
| const std::string model_dir = "/path/to/Qwen3-235B-A22B-Instruct-2507-BF16"; |
| const std::string attn_data = "tests/attn_data"; |
|
|
| Runner runner; |
| int N = 2; |
| if (const char* env = std::getenv("N_LAYERS")) N = std::atoi(env); |
| if (!runner.init(model_dir, 1, 0, |
| N, 128)) return 1; |
|
|
| |
| auto tok_raw = read_file(attn_data + "/token_ids.bin"); |
| int32_t S = *(int32_t*)tok_raw.data(); |
| std::vector<int32_t> tokens(S); |
| std::memcpy(tokens.data(), tok_raw.data() + 4, S * 4); |
| printf("prefill %d tokens...\n", S); |
|
|
| DeviceBuffer logits; |
| if (!runner.prefill(tokens.data(), S, logits)) return 1; |
| printf("prefill done. past_len=%ld, logits size=%zu bytes\n", runner.past_len(), logits.size); |
|
|
| |
| const int64_t V = runner.cfg().vocab_size; |
| std::vector<uint16_t> logits_host(V); |
| ACL_CHECK(aclrtMemcpy(logits_host.data(), V*2, logits.get(), V*2, ACL_MEMCPY_DEVICE_TO_HOST)); |
|
|
| |
| int best = 0; |
| float best_v = bf16_to_float(logits_host[0]); |
| for (int i = 1; i < V; i++) { |
| float v = bf16_to_float(logits_host[i]); |
| if (v > best_v) { best_v = v; best = i; } |
| } |
| printf("prefill argmax token_id = %d (logit=%.3f)\n", best, best_v); |
|
|
| |
| DeviceBuffer logits2; |
| if (!runner.decode(best, logits2)) return 1; |
| printf("decode done. past_len=%ld\n", runner.past_len()); |
|
|
| std::vector<uint16_t> logits2_host(V); |
| ACL_CHECK(aclrtMemcpy(logits2_host.data(), V*2, logits2.get(), V*2, ACL_MEMCPY_DEVICE_TO_HOST)); |
| int best2 = 0; |
| float best2_v = bf16_to_float(logits2_host[0]); |
| for (int i = 1; i < V; i++) { |
| float v = bf16_to_float(logits2_host[i]); |
| if (v > best2_v) { best2_v = v; best2 = i; } |
| } |
| printf("decode argmax token_id = %d (logit=%.3f)\n", best2, best2_v); |
|
|
| printf("\n=== test_runner (N=1 layer) PASS — prefill + decode ran cleanly ===\n"); |
| return 0; |
| } |
|
|