diff --git a/tests/config/test_multimodal_config.py b/tests/config/test_multimodal_config.py index 51bf93878..e5c30f999 100644 --- a/tests/config/test_multimodal_config.py +++ b/tests/config/test_multimodal_config.py @@ -3,6 +3,7 @@ import pytest +from vllm.config.model import ModelConfig from vllm.config.multimodal import MultiModalConfig from vllm.v1.attention.backends.registry import AttentionBackendEnum @@ -23,3 +24,20 @@ def test_mm_encoder_attn_backend_hash_updates(): mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN ).compute_hash() assert base_hash != overridden_hash + + +def test_language_model_only_does_not_affect_mm_hash(): + """language_model_only does not affect the ViT computation graph, + so it should not change the multimodal config hash.""" + base_hash = MultiModalConfig().compute_hash() + lm_only_hash = MultiModalConfig(language_model_only=True).compute_hash() + assert base_hash == lm_only_hash + + +def test_language_model_only_affects_model_hash(): + """language_model_only affects the LM computation graph, + so it should change the model config hash.""" + model = "llava-hf/llava-1.5-7b-hf" + base_hash = ModelConfig(model).compute_hash() + lm_only_hash = ModelConfig(model, language_model_only=True).compute_hash() + assert base_hash != lm_only_hash diff --git a/vllm/config/model.py b/vllm/config/model.py index 0a5ff385f..1a39fb42e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -357,6 +357,12 @@ class ModelConfig: from vllm.config.utils import get_hash_factors, hash_factors factors = get_hash_factors(self, ignored_factors) + + # NOTE: For some models (e.g, Qwen3-VL), whether the MM code path is enabled + # affects the computation graph of the language model, therefore we add it + # here early. + if self.multimodal_config: + factors["language_model_only"] = self.multimodal_config.language_model_only return hash_factors(factors) def _update_nested( diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 7a10783e8..0a867f1c8 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -219,7 +219,6 @@ class MultiModalConfig: the final hidden states. """ factors: list[Any] = [ - self.language_model_only, self.mm_encoder_attn_backend.name if self.mm_encoder_attn_backend is not None else None,