kernels-bot commited on
Commit
c88dadb
·
verified ·
1 Parent(s): 9441019

Uploaded using `kernel-builder`.

Browse files
.gitattributes CHANGED
@@ -38,3 +38,5 @@ build/torch28-metal-aarch64-darwin/_mlx_quantization_metal_kernels_33fa8c7.abi3.
38
  build/torch29-metal-aarch64-darwin/_mlx_quantization_metal_kernels_33fa8c7.abi3.so filter=lfs diff=lfs merge=lfs -text
39
  build/torch210-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so filter=lfs diff=lfs merge=lfs -text
40
  build/torch29-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so filter=lfs diff=lfs merge=lfs -text
 
 
 
38
  build/torch29-metal-aarch64-darwin/_mlx_quantization_metal_kernels_33fa8c7.abi3.so filter=lfs diff=lfs merge=lfs -text
39
  build/torch210-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so filter=lfs diff=lfs merge=lfs -text
40
  build/torch29-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so filter=lfs diff=lfs merge=lfs -text
41
+ build/torch210-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_86f75d9.abi3.so filter=lfs diff=lfs merge=lfs -text
42
+ build/torch211-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_86f75d9.abi3.so filter=lfs diff=lfs merge=lfs -text
build/torch210-metal-aarch64-darwin/{_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so → _mlx_quantization_metal_kernels_metal_86f75d9.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f3cd1fc44fc6b52d4ec5bc275cd8ebbc17bc247256ff0063cf0dc70a7f0e57b
3
- size 40027032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf762ec1255ee6555cb163b30c6bfa6abef2425c04982f908aba65899b4d2f5f
3
+ size 40010744
build/torch210-metal-aarch64-darwin/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _mlx_quantization_metal_kernels_metal_0aaded7
3
- ops = torch.ops._mlx_quantization_metal_kernels_metal_0aaded7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_mlx_quantization_metal_kernels_metal_0aaded7::{op_name}"
 
1
  import torch
2
+ from . import _mlx_quantization_metal_kernels_metal_86f75d9
3
+ ops = torch.ops._mlx_quantization_metal_kernels_metal_86f75d9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_mlx_quantization_metal_kernels_metal_86f75d9::{op_name}"
build/torch210-metal-aarch64-darwin/metadata.json CHANGED
@@ -1,4 +1,10 @@
1
  {
 
 
 
2
  "license": "MIT",
3
- "python-depends": []
4
- }
 
 
 
 
1
  {
2
+ "name": "mlx-quantization-metal-kernels",
3
+ "id": "_mlx_quantization_metal_kernels_metal_86f75d9",
4
+ "version": 1,
5
  "license": "MIT",
6
+ "python-depends": [],
7
+ "backend": {
8
+ "type": "metal"
9
+ }
10
+ }
build/torch210-metal-aarch64-darwin/mlx_quantization_metal_kernels/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
  import ctypes
 
2
  import sys
3
-
4
- import importlib
5
  from pathlib import Path
6
  from types import ModuleType
7
 
 
8
  def _import_from_path(file_path: Path) -> ModuleType:
9
  # We cannot use the module name as-is, after adding it to `sys.modules`,
10
  # it would also be used for other imports. So, we make a module name that
 
1
  import ctypes
2
+ import importlib.util
3
  import sys
 
 
4
  from pathlib import Path
5
  from types import ModuleType
6
 
7
+
8
  def _import_from_path(file_path: Path) -> ModuleType:
9
  # We cannot use the module name as-is, after adding it to `sys.modules`,
10
  # it would also be used for other imports. So, we make a module name that
build/{torch28-metal-aarch64-darwin → torch211-metal-aarch64-darwin}/__init__.py RENAMED
File without changes
build/{torch28-metal-aarch64-darwin/_mlx_quantization_metal_kernels_33fa8c7.abi3.so → torch211-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_86f75d9.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84d137061b400ae30b4822a0bace36f211301162e85e21d5dc1d4a3e0a415d6
3
- size 40042432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0bab012de037b761f8ecac05f87c017c1aa32bf7a09187164c4193e77237222
3
+ size 40010744
build/{torch29-metal-aarch64-darwin → torch211-metal-aarch64-darwin}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _mlx_quantization_metal_kernels_metal_0aaded7
3
- ops = torch.ops._mlx_quantization_metal_kernels_metal_0aaded7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_mlx_quantization_metal_kernels_metal_0aaded7::{op_name}"
 
1
  import torch
2
+ from . import _mlx_quantization_metal_kernels_metal_86f75d9
3
+ ops = torch.ops._mlx_quantization_metal_kernels_metal_86f75d9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_mlx_quantization_metal_kernels_metal_86f75d9::{op_name}"
build/torch211-metal-aarch64-darwin/metadata.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "mlx-quantization-metal-kernels",
3
+ "id": "_mlx_quantization_metal_kernels_metal_86f75d9",
4
+ "version": 1,
5
+ "license": "MIT",
6
+ "python-depends": [],
7
+ "backend": {
8
+ "type": "metal"
9
+ }
10
+ }
build/{torch28-metal-aarch64-darwin → torch211-metal-aarch64-darwin}/mlx_quantization_metal_kernels/__init__.py RENAMED
@@ -1,10 +1,10 @@
1
  import ctypes
 
2
  import sys
3
-
4
- import importlib
5
  from pathlib import Path
6
  from types import ModuleType
7
 
 
8
  def _import_from_path(file_path: Path) -> ModuleType:
9
  # We cannot use the module name as-is, after adding it to `sys.modules`,
10
  # it would also be used for other imports. So, we make a module name that
 
1
  import ctypes
2
+ import importlib.util
3
  import sys
 
 
4
  from pathlib import Path
5
  from types import ModuleType
6
 
7
+
8
  def _import_from_path(file_path: Path) -> ModuleType:
9
  # We cannot use the module name as-is, after adding it to `sys.modules`,
10
  # it would also be used for other imports. So, we make a module name that
build/torch28-metal-aarch64-darwin/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _mlx_quantization_metal_kernels_33fa8c7
3
- ops = torch.ops._mlx_quantization_metal_kernels_33fa8c7
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_mlx_quantization_metal_kernels_33fa8c7::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-metal-aarch64-darwin/metadata.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "python-depends": []
3
- }
 
 
 
 
build/torch29-metal-aarch64-darwin/__init__.py DELETED
@@ -1,162 +0,0 @@
1
- from typing import Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # =============================================================================
9
- # FP-quantized (MXFP4) operations
10
- # =============================================================================
11
-
12
-
13
- def mxfp4_qmm_n(
14
- x: torch.Tensor,
15
- w: torch.Tensor,
16
- scales: torch.Tensor,
17
- output_features: int,
18
- ) -> torch.Tensor:
19
- """Matrix-matrix multiply with MXFP4 quantized non-transposed weight.
20
-
21
- Computes y = x @ dequantize(w, scales).
22
- x: [..., M, K], w: [K_packed, N_packed] (uint32), y: [..., M, output_features]
23
- """
24
- return ops.mxfp4_qmm_n(x, w, scales, output_features)
25
-
26
-
27
- def mxfp4_qmv(
28
- x: torch.Tensor,
29
- w: torch.Tensor,
30
- scales: torch.Tensor,
31
- output_features: int,
32
- ) -> torch.Tensor:
33
- """Matrix-vector multiply with MXFP4 quantized weight.
34
-
35
- Computes y = dequantize(w, scales) @ x.
36
- x: [..., K], w: [N, K_packed] (uint32), y: [..., output_features]
37
- """
38
- return ops.mxfp4_qmv(x, w, scales, output_features)
39
-
40
-
41
- # =============================================================================
42
- # Affine quantized operations (scales + biases)
43
- # =============================================================================
44
-
45
-
46
- def affine_qmv(
47
- x: torch.Tensor,
48
- w: torch.Tensor,
49
- scales: torch.Tensor,
50
- biases: torch.Tensor,
51
- output_features: int,
52
- group_size: int = 128,
53
- bits: int = 4,
54
- ) -> torch.Tensor:
55
- """Matrix-vector multiply with affine quantized weight.
56
-
57
- x: [..., K], w: [N, K_packed], y: [..., output_features]
58
- """
59
- return ops.affine_qmv(x, w, scales, biases, group_size, bits, output_features)
60
-
61
-
62
- def affine_qmm_t(
63
- x: torch.Tensor,
64
- w: torch.Tensor,
65
- scales: torch.Tensor,
66
- biases: torch.Tensor,
67
- group_size: int = 128,
68
- bits: int = 4,
69
- ) -> torch.Tensor:
70
- """Matrix-matrix multiply with affine quantized transposed weight.
71
-
72
- Computes y = x @ dequantize(w, scales, biases).T
73
- x: [..., M, K], w: [N, K_packed], y: [..., M, N]
74
- N is inferred from w.size(0).
75
- """
76
- return ops.affine_qmm_t(x, w, scales, biases, group_size, bits)
77
-
78
-
79
- def affine_qmm_n(
80
- x: torch.Tensor,
81
- w: torch.Tensor,
82
- scales: torch.Tensor,
83
- biases: torch.Tensor,
84
- output_features: int,
85
- group_size: int = 128,
86
- bits: int = 4,
87
- ) -> torch.Tensor:
88
- """Matrix-matrix multiply with affine quantized non-transposed weight.
89
-
90
- Computes y = x @ dequantize(w, scales, biases)
91
- x: [..., M, K], w: [K_packed, N_packed], y: [..., M, output_features]
92
- """
93
- return ops.affine_qmm_n(x, w, scales, biases, group_size, bits, output_features)
94
-
95
-
96
- # =============================================================================
97
- # Affine quantized NAX operations (MetalPerformancePrimitives accelerated)
98
- # =============================================================================
99
-
100
-
101
- def affine_qmm_t_nax(
102
- x: torch.Tensor,
103
- w: torch.Tensor,
104
- scales: torch.Tensor,
105
- biases: torch.Tensor,
106
- group_size: int = 128,
107
- bits: int = 4,
108
- ) -> torch.Tensor:
109
- """NAX-accelerated matrix-matrix multiply with transposed quantized weight.
110
-
111
- x: [..., M, K], w: [N, K_packed], y: [..., M, N]
112
- """
113
- return ops.affine_qmm_t_nax(x, w, scales, biases, group_size, bits)
114
-
115
-
116
- def affine_qmm_n_nax(
117
- x: torch.Tensor,
118
- w: torch.Tensor,
119
- scales: torch.Tensor,
120
- biases: torch.Tensor,
121
- output_features: int,
122
- group_size: int = 128,
123
- bits: int = 4,
124
- ) -> torch.Tensor:
125
- """NAX-accelerated matrix-matrix multiply with non-transposed quantized weight.
126
-
127
- x: [..., M, K], w: [K_packed, N_packed], y: [..., M, output_features]
128
- """
129
- return ops.affine_qmm_n_nax(x, w, scales, biases, group_size, bits, output_features)
130
-
131
-
132
- def affine_gather_qmm_rhs_nax(
133
- x: torch.Tensor,
134
- w: torch.Tensor,
135
- scales: torch.Tensor,
136
- biases: torch.Tensor,
137
- indices: torch.Tensor,
138
- output_features: int,
139
- group_size: int = 128,
140
- bits: int = 4,
141
- transpose: bool = True,
142
- ) -> torch.Tensor:
143
- """NAX-accelerated gather + matrix-matrix multiply.
144
-
145
- Gathers weight rows using indices, then computes matmul.
146
- x: [M, K], w: [num_experts, ...], indices: [M], y: [M, output_features]
147
- """
148
- return ops.affine_gather_qmm_rhs_nax(
149
- x, w, scales, biases, indices, group_size, bits, output_features, transpose
150
- )
151
-
152
-
153
- __all__ = [
154
- "mxfp4_qmm_n",
155
- "mxfp4_qmv",
156
- "affine_qmv",
157
- "affine_qmm_t",
158
- "affine_qmm_n",
159
- "affine_qmm_t_nax",
160
- "affine_qmm_n_nax",
161
- "affine_gather_qmm_rhs_nax",
162
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-metal-aarch64-darwin/_mlx_quantization_metal_kernels_metal_0aaded7.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7ad3fa1ac77f9dc330bd7411b0168552dcaa25a5c6e91b61ed0b37c90b2a9f
3
- size 40042616
 
 
 
 
build/torch29-metal-aarch64-darwin/metadata.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "license": "MIT",
3
- "python-depends": []
4
- }
 
 
 
 
 
build/torch29-metal-aarch64-darwin/mlx_quantization_metal_kernels/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))