[V0 deprecation] Remove QKVCrossParallelLinear implementation (#26475)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-10-09 18:52:27 +08:00
parent ec10fd0abc
commit d1ddf340c8
4 changed files with 2 additions and 255 deletions
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -17,7 +17,6 @@ from vllm.attention import Attention
 from vllm.attention.layer import MLAAttention
 from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
-from vllm.model_executor.layers.linear import QKVCrossParallelLinear
 from vllm.model_executor.layers.quantization.base_config import (
    QuantizationConfig,
    QuantizeMethodBase,
@@ -108,11 +107,6 @@ def process_weights_after_loading(
    maybe_save_metadata_and_attributes_for_weight_reloading(model, model_config)

    for _, module in model.named_modules():
-        if isinstance(module, QKVCrossParallelLinear):
-            # NOTE(Isotr0py): special case for cross QKV layer because
-            # q and kv proj aren't registered as submodules intentionally
-            module.process_weights_after_loading()
-            continue
        quant_method = getattr(module, "quant_method", None)
        if isinstance(quant_method, QuantizeMethodBase):
            # When quant methods need to process weights after loading