| self._handle_gpu_memory_settings(gpu_mem) |
| File "/mnt/data/yiran/sglang/python/sglang/srt/server_args.py", line 1011, in _handle_gpu_memory_settings |
| if not self.use_mla_backend(): |
| ^^^^^^^^^^^^^^^^^^^^^^ |
| File "/mnt/data/yiran/sglang/python/sglang/srt/server_args.py", line 5124, in use_mla_backend |
| from sglang.srt.configs.model_config import AttentionArch |
| File "/mnt/data/yiran/sglang/python/sglang/srt/configs/model_config.py", line 27, in <module> |
| from sglang.srt.layers.quantization import QUANTIZATION_METHODS |
| File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/__init__.py", line 19, in <module> |
| from sglang.srt.layers.quantization.auto_round import AutoRoundConfig |
| File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/auto_round.py", line 12, in <module> |
| from sglang.srt.layers.quantization.utils import get_scalar_types |
| File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/utils.py", line 13, in <module> |
| from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant |
| File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/fp8_kernel.py", line 50, in <module> |
| from sgl_kernel import sgl_per_token_quant_fp8 |
| File "/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/__init__.py", line 5, in <module> |
| common_ops = _load_architecture_specific_ops() |
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| File "/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/load_utils.py", line 188, in _load_architecture_specific_ops |
| raise ImportError(error_msg) |
| ImportError: |
| [sgl_kernel] CRITICAL: Could not load any common_ops library! |
|
|
| Attempted locations: |
| 1. Architecture-specific pattern: /mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/sm100/common_ops.* - found files: ['/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/sm100/common_ops.abi3.so'] |
| 2. Fallback pattern: /mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/common_ops.* - found files: [] |
| 3. Standard Python import: common_ops - failed |
|
|
| GPU Info: |
| - Compute capability: 80 |
| - Expected variant: SM80 (precise math for compatibility) |
|
|
| Please ensure sgl_kernel is properly installed with: |
| pip install --upgrade sgl_kernel |
|
|
| Error details from previous import attempts: |
| - ImportError: libnuma.so.1: cannot open shared object file: No such file or directory |
| - ModuleNotFoundError: No module named 'common_ops' |