diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..cb4ae273bb47fe3873611d6f094e9b637e4e103e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,100 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-xpu20252-x86_64-windows/rmsnorm/_rmsnorm_96c9886.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-xpu20253-x86_64-windows/rmsnorm/_rmsnorm_4cd2f5b.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_0f8f3b4.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text +build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c8870a17590bc1bd03ed09dc4bdb14bccb86caf1 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +--- +tags: +- kernels +- cuda +--- \ No newline at end of file diff --git a/build/torch210-cxx11-cpu-x86_64-linux/__init__.py b/build/torch210-cxx11-cpu-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch210-cxx11-cpu-x86_64-linux/_ops.py b/build/torch210-cxx11-cpu-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f6b39cfa78e70e8f8a8ee88dcaa37942f199479e --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_cpu_1a02f6f +ops = torch.ops._rmsnorm_cpu_1a02f6f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_cpu_1a02f6f::{op_name}" diff --git a/build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so b/build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..2b8d25b32392b3987d6823b3d8e55fb28134b077 --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c102259696d99bbe9d4c686b4293195548faa4856123a358d44aab3d90148620 +size 2006072 diff --git a/build/torch210-cxx11-cpu-x86_64-linux/layers.py b/build/torch210-cxx11-cpu-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch210-cxx11-cpu-x86_64-linux/metadata.json b/build/torch210-cxx11-cpu-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eb22148b3f551be150f7824a5684c19bbc40ae0e --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cpu" + } +} \ No newline at end of file diff --git a/build/torch210-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py b/build/torch210-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py b/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py b/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dd5f183ce502642a8715282c0dddcb5e305f01 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_xpu_1a02f6f +ops = torch.ops._rmsnorm_xpu_1a02f6f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_xpu_1a02f6f::{op_name}" diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so b/build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..c4362d7482afafc37105461800325dc8dcde2649 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a87f0910ab215646183ecd9f4b2cbc5be6c72c3eee20d167f42f71c14629e65 +size 104793360 diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/layers.py b/build/torch210-cxx11-xpu20253-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json b/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b911d0a2549a35a1c65ab7e77d32e5aac23cd6ac --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py b/build/torch210-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-xpu20253-x86_64-windows/__init__.py b/build/torch210-xpu20253-x86_64-windows/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c1c7731922beab2e10baf849ad97324beb02c15 --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch210-xpu20253-x86_64-windows/_ops.py b/build/torch210-xpu20253-x86_64-windows/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..dece9201fc5e1cac54dce23a2d64cbd7ca1859d2 --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_xpu_2aa36b6 +ops = torch.ops._rmsnorm_xpu_2aa36b6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_xpu_2aa36b6::{op_name}" diff --git a/build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd b/build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd new file mode 100644 index 0000000000000000000000000000000000000000..296a4c5a336e00b5f9c912564cd8f7e4a0003a5f --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690752b7e809e03b7be6d8f5521080ea84115db1078cf6a0010597612e5844d7 +size 2363904 diff --git a/build/torch210-xpu20253-x86_64-windows/layers.py b/build/torch210-xpu20253-x86_64-windows/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..4401139637abaedc8e292bcc938ed17f3a6e1c89 --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch210-xpu20253-x86_64-windows/metadata.json b/build/torch210-xpu20253-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a5381dd80836f863378b9f33a559815688de9287 --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/metadata.json @@ -0,0 +1,5 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch210-xpu20253-x86_64-windows/rmsnorm/__init__.py b/build/torch210-xpu20253-x86_64-windows/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bc434ef44e63409acb52a8f3fff54a4adc46ed6a --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cpu-x86_64-linux/__init__.py b/build/torch211-cxx11-cpu-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch211-cxx11-cpu-x86_64-linux/_ops.py b/build/torch211-cxx11-cpu-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f6b39cfa78e70e8f8a8ee88dcaa37942f199479e --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_cpu_1a02f6f +ops = torch.ops._rmsnorm_cpu_1a02f6f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_cpu_1a02f6f::{op_name}" diff --git a/build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so b/build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..8d4c155c592327aef47b369b9f4b1b5003531435 --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439ac1a1bc4a6095844795cbccd7f2137c101bce3e3415bcebb3fd2b0dfcb97b +size 2001976 diff --git a/build/torch211-cxx11-cpu-x86_64-linux/layers.py b/build/torch211-cxx11-cpu-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch211-cxx11-cpu-x86_64-linux/metadata.json b/build/torch211-cxx11-cpu-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eb22148b3f551be150f7824a5684c19bbc40ae0e --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cpu" + } +} \ No newline at end of file diff --git a/build/torch211-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py b/build/torch211-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py b/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py b/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dd5f183ce502642a8715282c0dddcb5e305f01 --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_xpu_1a02f6f +ops = torch.ops._rmsnorm_xpu_1a02f6f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_xpu_1a02f6f::{op_name}" diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so b/build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..31be8841d454fe9c33f5de2b2d4738593c2fd54f --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153aa232ee4f342e92075140aa796e86ccd2f55f07d27bcad90890ed2fac57bf +size 104793120 diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/layers.py b/build/torch211-cxx11-xpu20253-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json b/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b911d0a2549a35a1c65ab7e77d32e5aac23cd6ac --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py b/build/torch211-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__init__.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de2235c56b91c957e1ba2c1b5dc189aa3682bc1b --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,14 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + return ops.apply_rms_norm( + input, + weight, + eps, + ) + +__all__ = ["layers", "apply_rms_norm"] + diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8491c70660a5490fa811a066e5cade533975d4f6 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..082997e4c8c09f173a9b3b1af712b717ffabea92 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58001ddaeac4d0e2f8695d10b8a0d613b2a9919a Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_ops.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce583db41cbfd7783d2ea4bb1e9287e23a3bf3e --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_0d12ee5 +ops = torch.ops._rmsnorm_0d12ee5 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_0d12ee5::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..7ee0a01a1d2b5e7be2408d3028781311a0118238 --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79eb24cb07a24a3f829ce1d210bd0cbd79badd0cc236710a84e83c15575ddf04 +size 100963504 diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/layers.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..a78c858f9c1d2c18845e0a6ebae1c4327284b9dc --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/layers.py @@ -0,0 +1,36 @@ +import torch +from ._ops import ops + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return ops.apply_rms_norm( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch28-cxx11-cpu-x86_64-linux/__init__.py b/build/torch28-cxx11-cpu-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch28-cxx11-cpu-x86_64-linux/_ops.py b/build/torch28-cxx11-cpu-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..72eb9e39d1d8ec3d9ad8f07fd8d6dbcd034187d7 --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_235cde1 +ops = torch.ops._rmsnorm_235cde1 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_235cde1::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so b/build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..cb7d092ce04f021549684ad2a4f95e6de84c82f8 --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c92de9cefabeeadc60ffff87189a1e66ecb9ea19b343570ac55e9d9c7d98fe +size 156648 diff --git a/build/torch28-cxx11-cpu-x86_64-linux/layers.py b/build/torch28-cxx11-cpu-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch28-cxx11-cpu-x86_64-linux/metadata.json b/build/torch28-cxx11-cpu-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py b/build/torch28-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py b/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..72eb9e39d1d8ec3d9ad8f07fd8d6dbcd034187d7 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_235cde1 +ops = torch.ops._rmsnorm_235cde1 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_235cde1::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so b/build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..28951a4be0835cb76bf825819a2c98916258e3b2 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c4b43d63dc74b210633da81630023a6d6e359a7a1115bff55da9f4436053d9 +size 103700632 diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/layers.py b/build/torch28-cxx11-xpu20251-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json b/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rmsnorm/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cpu-x86_64-linux/__init__.py b/build/torch29-cxx11-cpu-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch29-cxx11-cpu-x86_64-linux/_ops.py b/build/torch29-cxx11-cpu-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..fcfb7861dfb320e9e4ae32388904a8bea9ce7079 --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_cpu_b3d66c6 +ops = torch.ops._rmsnorm_cpu_b3d66c6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_cpu_b3d66c6::{op_name}" diff --git a/build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so b/build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..6d57c2727fb3d802d56b32e4ad541c093fe0e547 --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3b3a68445d97357b4c08dd07ed0d197d18c9e7449ad62172dd55dfc49e7d08 +size 1999776 diff --git a/build/torch29-cxx11-cpu-x86_64-linux/layers.py b/build/torch29-cxx11-cpu-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch29-cxx11-cpu-x86_64-linux/metadata.json b/build/torch29-cxx11-cpu-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a5381dd80836f863378b9f33a559815688de9287 --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/metadata.json @@ -0,0 +1,5 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py b/build/torch29-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37702991fadf27d757eba7cb11a50704006e4f9c --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py b/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..399c07d6a8ba93f4907028763b4b8967d50eb815 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_xpu_cec90b8 +ops = torch.ops._rmsnorm_xpu_cec90b8 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_xpu_cec90b8::{op_name}" diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so b/build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..fa48eb982c08dacaf4d75c447d3fc11b177d4f85 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fb0e95d3b6be17bd03833abcf461bb10d9c62fbf1336d9226dce0950dce1fa +size 102179544 diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/layers.py b/build/torch29-cxx11-xpu20252-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ed828add3a21d3c45864ccf3d43123f1d3911a1c --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"] diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json b/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b911d0a2549a35a1c65ab7e77d32e5aac23cd6ac --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/rmsnorm/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/rmsnorm/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-xpu20252-x86_64-windows/metadata.json b/build/torch29-xpu20252-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-xpu20252-x86_64-windows/rmsnorm/__init__.py b/build/torch29-xpu20252-x86_64-windows/rmsnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c1c7731922beab2e10baf849ad97324beb02c15 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rmsnorm/__init__.py @@ -0,0 +1,27 @@ +from . import layers + +from ._ops import ops + + +def apply_rms_norm(input, weight, eps): + # ops.apply_rms_norm returns [output, rstd] + return ops.apply_rms_norm( + input, + weight, + eps, + )[0] + +def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True): + return ops.apply_rms_norm_backward( + grad_output, + input, + weight, + output, + rstd, + eps, + input_requires_grad, + weight_requires_grad + ) + +__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"] + diff --git a/build/torch29-xpu20252-x86_64-windows/rmsnorm/_ops.py b/build/torch29-xpu20252-x86_64-windows/rmsnorm/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..572899747964300a538b85d5952f7aee04ea8c27 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rmsnorm/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rmsnorm_96c9886 +ops = torch.ops._rmsnorm_96c9886 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rmsnorm_96c9886::{op_name}" \ No newline at end of file diff --git a/build/torch29-xpu20252-x86_64-windows/rmsnorm/_rmsnorm_96c9886.pyd b/build/torch29-xpu20252-x86_64-windows/rmsnorm/_rmsnorm_96c9886.pyd new file mode 100644 index 0000000000000000000000000000000000000000..2813ba3019836fb28e60f27081e73a38a8a892cc --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rmsnorm/_rmsnorm_96c9886.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0cfb67260dcf293c71463a698f1531e9d86fb497f9dcf86c296d612ffa4c142 +size 2379264 diff --git a/build/torch29-xpu20252-x86_64-windows/rmsnorm/layers.py b/build/torch29-xpu20252-x86_64-windows/rmsnorm/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..4401139637abaedc8e292bcc938ed17f3a6e1c89 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rmsnorm/layers.py @@ -0,0 +1,59 @@ +import torch +from ._ops import ops + +class RMSNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, weight, variance_epsilon): + ctx.variance_epsilon = variance_epsilon + output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon) + ctx.save_for_backward(hidden_states, weight, output, rstd) + return output + + @staticmethod + def backward(ctx, grad_output): + hidden_states, weight, output, rstd = ctx.saved_tensors + grads = ops.apply_rms_norm_backward( + grad_output, + hidden_states, + weight, + output, + rstd, + ctx.variance_epsilon, + ctx.needs_input_grad[0], + ctx.needs_input_grad[1] + ) + return grads[0], grads[1], None + +class RMSNorm(torch.nn.Module): + """ + RMSNorm module that uses the optimized LigerRMSNormFunction. + + Args: + hidden_size (int): The size of the hidden dimension. + eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6. + offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0. + casting_mode (str, optional): The casting mode to use. Defaults to "llama". + in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True. + """ + + + weight: torch.Tensor + variance_epsilon: float + + def forward(self, hidden_states): + """ + Apply RMS normalization to the input tensor. + + Args: + hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H) + + Returns: + torch.Tensor: Normalized tensor of the same shape as input + """ + return RMSNormFunction.apply( + hidden_states, + self.weight, + self.variance_epsilon, + ) + +__all__ = ["RMSNorm"]