diff --git a/vllm/model_executor/kernels/linear/__init__.py b/vllm/model_executor/kernels/linear/__init__.py index 1b4b7dc88..79afc8b37 100644 --- a/vllm/model_executor/kernels/linear/__init__.py +++ b/vllm/model_executor/kernels/linear/__init__.py @@ -13,7 +13,6 @@ or kernel implementation, add it to this __init__.py to maintain import stability. """ -import os from typing import TypeVar import torch @@ -154,8 +153,7 @@ _KernelConfigT = TypeVar("_KernelConfigT", bound=ScaledMMLinearLayerConfig) def is_supported_and_can_implement_kernel( kernel: type[_KernelT], config: _KernelConfigT, compute_capability: int | None ) -> tuple[bool, str]: - # TODO: Fetch `VLLM_DISABLED_KERNELS` from vllm.envs instead. - if kernel.__name__ in os.environ.get("VLLM_DISABLED_KERNELS", "").split(","): + if kernel.__name__ in envs.VLLM_DISABLED_KERNELS: return False, f" {kernel.__name__} is disabled by environment variable" if compute_capability is None: