TilelliLab
/

Tilelli-llm

Text Generation

small-language-model

mixture-of-experts

negative-results

reproducibility

Model card Files Files and versions

Tilelli-llm / src /tilelli /core /hadamard.py

TilelliLab's picture

Mirror small files (code, paper, results)

f86dc09 verified 16 days ago

History Blame Contribute Delete

2 kB

	"""tilelli.core.hadamard — orthogonal-rotation utilities for ternary quantization.

	Quantization-error reduction trick from QuaRot / SpinQuant (2024). Multiplying
	a weight matrix by an orthogonal matrix H spreads the energy of any single
	position across all positions, flattening outliers and producing a more
	Gaussian-like distribution that ternarizes with less rounding error.

	Sylvester construction works only for n = 2^k. For other sizes we fall
	back to a fixed-seed random orthogonal matrix (Householder/QR rotations),
	treated as equivalent in practice for quantization purposes.
	"""
	from __future__ import annotations

	import functools

	import torch
	from torch import Tensor


	def _is_power_of_two(n: int) -> bool:
	return n > 0 and (n & (n - 1)) == 0


	def _sylvester_hadamard(n: int) -> Tensor:
	if not _is_power_of_two(n):
	raise ValueError(f"Sylvester Hadamard requires power-of-2 size, got {n}")
	h = torch.tensor([[1.0]])
	while h.size(0) < n:
	top = torch.cat([h, h], dim=1)
	bot = torch.cat([h, -h], dim=1)
	h = torch.cat([top, bot], dim=0) / (2.0**0.5)
	return h


	def _random_orthogonal(n: int, seed: int = 1234) -> Tensor:
	g = torch.Generator(device="cpu").manual_seed(seed)
	a = torch.randn(n, n, generator=g, dtype=torch.float64)
	q, _r = torch.linalg.qr(a)
	return q.to(torch.float32)


	@functools.lru_cache(maxsize=64)
	def hadamard_matrix(n: int, seed: int = 1234) -> Tensor:
	if _is_power_of_two(n):
	return _sylvester_hadamard(n)
	return _random_orthogonal(n, seed=seed)


	def rotate_columns(w: Tensor, h: Tensor \| None = None) -> Tensor:
	n = w.size(-1)
	if h is None:
	h = hadamard_matrix(n).to(dtype=w.dtype, device=w.device)
	return w @ h


	def rotate_input(x: Tensor, n: int, h: Tensor \| None = None) -> Tensor:
	if h is None:
	h = hadamard_matrix(n).to(dtype=x.dtype, device=x.device)
	return x @ h


	__all__ = ["hadamard_matrix", "rotate_columns", "rotate_input"]