[Bugfix] Fix CUDA compatibility path setting for both datacenter and consumer NVIDIA GPUs (#33992)
Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com> Signed-off-by: Andrew Mello <19512127+88plug@users.noreply.github.com> Co-authored-by: 88plug <19512127+88plug@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
14
vllm/envs.py
14
vllm/envs.py
@@ -239,6 +239,8 @@ if TYPE_CHECKING:
|
||||
VLLM_WEIGHT_OFFLOADING_DISABLE_UVA: bool = False
|
||||
VLLM_DISABLE_LOG_LOGO: bool = False
|
||||
VLLM_LORA_DISABLE_PDL: bool = False
|
||||
VLLM_ENABLE_CUDA_COMPATIBILITY: bool = False
|
||||
VLLM_CUDA_COMPATIBILITY_PATH: str | None = None
|
||||
|
||||
|
||||
def get_default_cache_root():
|
||||
@@ -1591,6 +1593,16 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# Disable PDL for LoRA, as enabling PDL with LoRA on SM100 causes
|
||||
# Triton compilation to fail.
|
||||
"VLLM_LORA_DISABLE_PDL": lambda: bool(int(os.getenv("VLLM_LORA_DISABLE_PDL", "0"))),
|
||||
# Enable CUDA compatibility mode for datacenter GPUs with older
|
||||
# driver versions than the CUDA toolkit major version of vLLM.
|
||||
"VLLM_ENABLE_CUDA_COMPATIBILITY": lambda: (
|
||||
os.environ.get("VLLM_ENABLE_CUDA_COMPATIBILITY", "0").strip().lower()
|
||||
in ("1", "true")
|
||||
),
|
||||
# Path to the CUDA compatibility libraries when CUDA compatibility is enabled.
|
||||
"VLLM_CUDA_COMPATIBILITY_PATH": lambda: os.environ.get(
|
||||
"VLLM_CUDA_COMPATIBILITY_PATH", None
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -1731,6 +1743,8 @@ def compile_factors() -> dict[str, object]:
|
||||
"VLLM_CPU_MOE_PREPACK",
|
||||
"VLLM_CPU_SGL_KERNEL",
|
||||
"VLLM_TEST_FORCE_LOAD_FORMAT",
|
||||
"VLLM_ENABLE_CUDA_COMPATIBILITY",
|
||||
"VLLM_CUDA_COMPATIBILITY_PATH",
|
||||
"LOCAL_RANK",
|
||||
"CUDA_VISIBLE_DEVICES",
|
||||
"NO_COLOR",
|
||||
|
||||
Reference in New Issue
Block a user