"refactor: refactor_repeated_interfaces" (#32486)

Signed-off-by: tom-zju <tanjianpingzju1990@gmail.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
2026-01-18 22:07:01 +08:00
parent 38bf2ffb21
commit 2f03035a61
11 changed files with 43 additions and 77 deletions
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -22,6 +22,7 @@ from vllm.distributed import (
    get_tensor_model_parallel_world_size,
 )
 from vllm.logger import init_logger
+from vllm.lora.utils import is_moe_model
 from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.model_executor.layers.linear import (
    LinearBase,
@@ -52,11 +53,6 @@ from vllm.utils.torch_utils import set_default_torch_dtype
 logger = init_logger(__name__)


-def is_moe_model(model: torch.nn.Module) -> bool:
-    """Checks if the model contains FusedMoE layers."""
-    return bool(any(isinstance(module, FusedMoE) for module in model.modules()))
-
-
 class BitsAndBytesModelLoader(BaseModelLoader):
    """Model loader to load model weights with BitsAndBytes quantization."""