[Feature] Add SM103 (Blackwell Ultra) Support to vLLM (#30484)

Signed-off-by: LopezCastroRoberto <robertol.c510@gmail.com>
Signed-off-by: Roberto L. Castro <38211239+LopezCastroRoberto@users.noreply.github.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
Roberto L. Castro
2025-12-13 04:34:23 +01:00
committed by GitHub
parent 57e9bf1864
commit 4fa7ce46f3
21 changed files with 53 additions and 33 deletions

View File

@@ -118,19 +118,19 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
logger.info_once("Using FlashInfer MXFP4 BF16 backend for SM90")
return Mxfp4Backend.SM90_FI_MXFP4_BF16
elif (
current_platform.is_device_capability(100)
current_platform.is_device_capability_family(100)
and has_flashinfer()
and envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS
):
logger.info_once("Using FlashInfer MXFP4 MXFP8 CUTLASS backend for SM100")
return Mxfp4Backend.SM100_FI_MXFP4_MXFP8_CUTLASS
elif (
current_platform.is_device_capability(100)
current_platform.is_device_capability_family(100)
and has_flashinfer()
and envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
):
return Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
elif current_platform.is_device_capability(100) and has_flashinfer():
elif current_platform.is_device_capability_family(100) and has_flashinfer():
logger.info_once(
"Using FlashInfer MXFP4 BF16 backend for SM100, "
"For faster performance on SM100, consider setting "
@@ -139,7 +139,7 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
)
return Mxfp4Backend.SM100_FI_MXFP4_BF16
elif (
current_platform.is_device_capability(100)
current_platform.is_device_capability_family(100)
or current_platform.is_device_capability(90)
) and not has_flashinfer():
logger.warning_once(