| |
| """Generate a linear (y = x @ W.T) reference for a realistic Qwen3 attention shape.""" |
| import os, struct, torch, torch_npu |
| torch.npu.set_device(0) |
| torch.manual_seed(7) |
|
|
| N, D, OUT = 5, 4096, 8192 |
|
|
| x = torch.randn(N, D, dtype=torch.bfloat16).npu() |
| W = torch.randn(OUT, D, dtype=torch.bfloat16).npu() |
| |
| y_ref = torch.matmul(x, W.t()) |
|
|
| out_dir = 'tests/mm_data' |
| os.makedirs(out_dir, exist_ok=True) |
| def dump(name, t): |
| p = os.path.join(out_dir, name + '.bin') |
| a = t.contiguous().cpu().view(torch.int16).numpy().astype('int16') |
| open(p, 'wb').write(a.tobytes()) |
| dump('x', x); dump('W', W); dump('y_ref', y_ref) |
| with open(os.path.join(out_dir, 'shape.txt'), 'w') as f: |
| f.write(f"N={N}\nD={D}\nOUT={OUT}\n") |
| print(f"N={N} D={D} OUT={OUT}, y_ref[0, :4] = {y_ref[0, :4].float().cpu().tolist()}") |
|
|