[Bugfix] Fix CUDA compatibility path setting for both datacenter and consumer NVIDIA GPUs (#33992)

Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com>
Signed-off-by: Andrew Mello <19512127+88plug@users.noreply.github.com>
Co-authored-by: 88plug <19512127+88plug@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Seungmin Kim
2026-02-26 11:15:51 +09:00
committed by GitHub
parent 9511a3f8ee
commit 160424a937
6 changed files with 334 additions and 5 deletions

View File

@@ -239,6 +239,8 @@ if TYPE_CHECKING:
VLLM_WEIGHT_OFFLOADING_DISABLE_UVA: bool = False
VLLM_DISABLE_LOG_LOGO: bool = False
VLLM_LORA_DISABLE_PDL: bool = False
VLLM_ENABLE_CUDA_COMPATIBILITY: bool = False
VLLM_CUDA_COMPATIBILITY_PATH: str | None = None
def get_default_cache_root():
@@ -1591,6 +1593,16 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Disable PDL for LoRA, as enabling PDL with LoRA on SM100 causes
# Triton compilation to fail.
"VLLM_LORA_DISABLE_PDL": lambda: bool(int(os.getenv("VLLM_LORA_DISABLE_PDL", "0"))),
# Enable CUDA compatibility mode for datacenter GPUs with older
# driver versions than the CUDA toolkit major version of vLLM.
"VLLM_ENABLE_CUDA_COMPATIBILITY": lambda: (
os.environ.get("VLLM_ENABLE_CUDA_COMPATIBILITY", "0").strip().lower()
in ("1", "true")
),
# Path to the CUDA compatibility libraries when CUDA compatibility is enabled.
"VLLM_CUDA_COMPATIBILITY_PATH": lambda: os.environ.get(
"VLLM_CUDA_COMPATIBILITY_PATH", None
),
}
@@ -1731,6 +1743,8 @@ def compile_factors() -> dict[str, object]:
"VLLM_CPU_MOE_PREPACK",
"VLLM_CPU_SGL_KERNEL",
"VLLM_TEST_FORCE_LOAD_FORMAT",
"VLLM_ENABLE_CUDA_COMPATIBILITY",
"VLLM_CUDA_COMPATIBILITY_PATH",
"LOCAL_RANK",
"CUDA_VISIBLE_DEVICES",
"NO_COLOR",