diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index ab1c384b9acc..fde26c213707 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -984,7 +984,8 @@ def _handle_model_specific_adjustments(self): "aiter", "triton", "trtllm_mha", - }, "fa3, aiter, triton, or trtllm_mha is required for Llama4 model" + "intel_xpu", + }, "fa3, aiter, triton, trtllm_mha or intel_xpu is required for Llama4 model" if is_sm100_supported() and self.attention_backend is None: self.attention_backend = "trtllm_mha" logger.warning( diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py index 91e2e98cd410..6b802c11b77f 100644 --- a/python/sglang/srt/utils/common.py +++ b/python/sglang/srt/utils/common.py @@ -1831,7 +1831,8 @@ def get_device_capability(device_id: int = 0) -> Tuple[int, int]: major, minor, *_ = torch.xpu.get_device_capability(device_id)["version"].split( "." ) - major, minor = int(major), int(minor) + # Currently XPU version does not contain capability information. + major, minor = None, None if hasattr(torch, "hpu") and torch.hpu.is_available(): try: