Create debug
#1
by Jennny - opened
debug
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
self._handle_gpu_memory_settings(gpu_mem)
|
| 2 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/server_args.py", line 1011, in _handle_gpu_memory_settings
|
| 3 |
+
if not self.use_mla_backend():
|
| 4 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 5 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/server_args.py", line 5124, in use_mla_backend
|
| 6 |
+
from sglang.srt.configs.model_config import AttentionArch
|
| 7 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/configs/model_config.py", line 27, in <module>
|
| 8 |
+
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
| 9 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/__init__.py", line 19, in <module>
|
| 10 |
+
from sglang.srt.layers.quantization.auto_round import AutoRoundConfig
|
| 11 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/auto_round.py", line 12, in <module>
|
| 12 |
+
from sglang.srt.layers.quantization.utils import get_scalar_types
|
| 13 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/utils.py", line 13, in <module>
|
| 14 |
+
from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
|
| 15 |
+
File "/mnt/data/yiran/sglang/python/sglang/srt/layers/quantization/fp8_kernel.py", line 50, in <module>
|
| 16 |
+
from sgl_kernel import sgl_per_token_quant_fp8
|
| 17 |
+
File "/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/__init__.py", line 5, in <module>
|
| 18 |
+
common_ops = _load_architecture_specific_ops()
|
| 19 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 20 |
+
File "/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/load_utils.py", line 188, in _load_architecture_specific_ops
|
| 21 |
+
raise ImportError(error_msg)
|
| 22 |
+
ImportError:
|
| 23 |
+
[sgl_kernel] CRITICAL: Could not load any common_ops library!
|
| 24 |
+
|
| 25 |
+
Attempted locations:
|
| 26 |
+
1. Architecture-specific pattern: /mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/sm100/common_ops.* - found files: ['/mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/sm100/common_ops.abi3.so']
|
| 27 |
+
2. Fallback pattern: /mnt/data/miniconda3/envs/slime/lib/python3.12/site-packages/sgl_kernel/common_ops.* - found files: []
|
| 28 |
+
3. Standard Python import: common_ops - failed
|
| 29 |
+
|
| 30 |
+
GPU Info:
|
| 31 |
+
- Compute capability: 80
|
| 32 |
+
- Expected variant: SM80 (precise math for compatibility)
|
| 33 |
+
|
| 34 |
+
Please ensure sgl_kernel is properly installed with:
|
| 35 |
+
pip install --upgrade sgl_kernel
|
| 36 |
+
|
| 37 |
+
Error details from previous import attempts:
|
| 38 |
+
- ImportError: libnuma.so.1: cannot open shared object file: No such file or directory
|
| 39 |
+
- ModuleNotFoundError: No module named 'common_ops'
|