diff --git a/vllm/envs.py b/vllm/envs.py index fb93cc7d7..d6243c02d 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -251,6 +251,7 @@ if TYPE_CHECKING: VLLM_LOG_MODEL_INSPECTION: bool = False VLLM_DEBUG_MFU_METRICS: bool = False VLLM_DISABLE_LOG_LOGO: bool = False + VLLM_LORA_DISABLE_PDL: bool = False def get_default_cache_root(): @@ -1618,8 +1619,12 @@ environment_variables: dict[str, Callable[[], Any]] = { ), # Disable logging of vLLM logo at server startup time. "VLLM_DISABLE_LOG_LOGO": lambda: bool(int(os.getenv("VLLM_DISABLE_LOG_LOGO", "0"))), + # Disable PDL for LoRA, as enabling PDL with LoRA on SM100 causes + # Triton compilation to fail. + "VLLM_LORA_DISABLE_PDL": lambda: bool(int(os.getenv("VLLM_LORA_DISABLE_PDL", "0"))), } + # --8<-- [end:env-vars-definition] diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index f0cb9a5c0..39c175f30 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -310,4 +310,9 @@ def supports_pdl(device: torch.device | None = None) -> bool: Refer to: https://github.com/triton-lang/triton/blob/v3.5.0/python/tutorials/11-programmatic-dependent-launch.py """ # PDL requires compute capability SM90 or above - return current_platform.is_cuda() and current_platform.has_device_capability(90) + + return ( + current_platform.is_cuda() + and current_platform.has_device_capability(90) + and not envs.VLLM_LORA_DISABLE_PDL + )