#!/usr/bin/env python3
"""Generate a linear (y = x @ W.T) reference for a realistic Qwen3 attention shape."""
import os, struct, torch, torch_npu
torch.npu.set_device(0)
torch.manual_seed(7)

N, D, OUT = 5, 4096, 8192  # prompt len, hidden, q_dim

x = torch.randn(N, D, dtype=torch.bfloat16).npu()
W = torch.randn(OUT, D, dtype=torch.bfloat16).npu()  # HF layout [out, in]
# y = x @ W.T, shape [N, OUT]
y_ref = torch.matmul(x, W.t())

out_dir = 'tests/mm_data'
os.makedirs(out_dir, exist_ok=True)
def dump(name, t):
    p = os.path.join(out_dir, name + '.bin')
    a = t.contiguous().cpu().view(torch.int16).numpy().astype('int16')
    open(p, 'wb').write(a.tobytes())
dump('x', x); dump('W', W); dump('y_ref', y_ref)
with open(os.path.join(out_dir, 'shape.txt'), 'w') as f:
    f.write(f"N={N}\nD={D}\nOUT={OUT}\n")
print(f"N={N} D={D} OUT={OUT}, y_ref[0, :4] = {y_ref[0, :4].float().cpu().tolist()}")