[Bugfix] Fix CUDA compatibility path setting for both datacenter and consumer NVIDIA GPUs (#33992)

Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com> Signed-off-by: Andrew Mello <19512127+88plug@users.noreply.github.com> Co-authored-by: 88plug <19512127+88plug@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-02-26 11:15:51 +09:00
parent 9511a3f8ee
commit 160424a937
6 changed files with 334 additions and 5 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -239,6 +239,8 @@ if TYPE_CHECKING:
    VLLM_WEIGHT_OFFLOADING_DISABLE_UVA: bool = False
    VLLM_DISABLE_LOG_LOGO: bool = False
    VLLM_LORA_DISABLE_PDL: bool = False
+    VLLM_ENABLE_CUDA_COMPATIBILITY: bool = False
+    VLLM_CUDA_COMPATIBILITY_PATH: str | None = None


 def get_default_cache_root():
@@ -1591,6 +1593,16 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Disable PDL for LoRA, as enabling PDL with LoRA on SM100 causes
    # Triton compilation to fail.
    "VLLM_LORA_DISABLE_PDL": lambda: bool(int(os.getenv("VLLM_LORA_DISABLE_PDL", "0"))),
+    # Enable CUDA compatibility mode for datacenter GPUs with older
+    # driver versions than the CUDA toolkit major version of vLLM.
+    "VLLM_ENABLE_CUDA_COMPATIBILITY": lambda: (
+        os.environ.get("VLLM_ENABLE_CUDA_COMPATIBILITY", "0").strip().lower()
+        in ("1", "true")
+    ),
+    # Path to the CUDA compatibility libraries when CUDA compatibility is enabled.
+    "VLLM_CUDA_COMPATIBILITY_PATH": lambda: os.environ.get(
+        "VLLM_CUDA_COMPATIBILITY_PATH", None
+    ),
 }


@@ -1731,6 +1743,8 @@ def compile_factors() -> dict[str, object]:
        "VLLM_CPU_MOE_PREPACK",
        "VLLM_CPU_SGL_KERNEL",
        "VLLM_TEST_FORCE_LOAD_FORMAT",
+        "VLLM_ENABLE_CUDA_COMPATIBILITY",
+        "VLLM_CUDA_COMPATIBILITY_PATH",
        "LOCAL_RANK",
        "CUDA_VISIBLE_DEVICES",
        "NO_COLOR",