"refactor: refactor_repeated_interfaces" (#32486)

Signed-off-by: tom-zju <tanjianpingzju1990@gmail.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
tjp_zju
2026-01-18 22:07:01 +08:00
committed by GitHub
parent 38bf2ffb21
commit 2f03035a61
11 changed files with 43 additions and 77 deletions

View File

@@ -22,6 +22,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size,
)
from vllm.logger import init_logger
from vllm.lora.utils import is_moe_model
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.linear import (
LinearBase,
@@ -52,11 +53,6 @@ from vllm.utils.torch_utils import set_default_torch_dtype
logger = init_logger(__name__)
def is_moe_model(model: torch.nn.Module) -> bool:
"""Checks if the model contains FusedMoE layers."""
return bool(any(isinstance(module, FusedMoE) for module in model.modules()))
class BitsAndBytesModelLoader(BaseModelLoader):
"""Model loader to load model weights with BitsAndBytes quantization."""