diff --git a/.buildkite/test_areas/model_runner_v2.yaml b/.buildkite/test_areas/model_runner_v2.yaml index b39b00d0c..9bc02f543 100644 --- a/.buildkite/test_areas/model_runner_v2.yaml +++ b/.buildkite/test_areas/model_runner_v2.yaml @@ -78,7 +78,6 @@ steps: - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray" - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py -# These require fix https://github.com/vllm-project/vllm/pull/36280 - label: Model Runner V2 Pipeline Parallelism (4 GPUs) timeout_in_minutes: 60 working_dir: "/vllm-workspace/tests" diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 6551526d1..6229b44d5 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -1106,6 +1106,9 @@ class VllmConfig: ) current_platform.check_and_update_config(self) + if envs.VLLM_USE_V2_MODEL_RUNNER: + self._validate_v2_model_runner() + # Re-compute compile ranges after platform-specific config updates # (e.g., XPU may lower max_num_batched_tokens when MLA is enabled) self._set_compile_ranges() @@ -1729,6 +1732,49 @@ class VllmConfig: f"kernel_config={self.kernel_config!r}" ) + def _validate_v2_model_runner(self) -> None: + """Check for features not yet supported by the V2 model runner.""" + unsupported: list[str] = [] + + if self.model_config is not None and self.model_config.has_inner_state: + unsupported.append("hybrid/mamba models") + + if self.parallel_config.prefill_context_parallel_size > 1: + unsupported.append("prefill context parallelism") + + if ( + self.speculative_config is not None + and self.speculative_config.method not in ("eagle", "eagle3", "mtp") + ): + unsupported.append(f"speculative method '{self.speculative_config.method}'") + + if self.parallel_config.enable_dbo: + unsupported.append("dual batch overlap") + + if ( + self.model_config is not None + and self.model_config.enable_return_routed_experts + ): + # Will be added by https://github.com/vllm-project/vllm/pull/38163 + unsupported.append("routed experts capture") + + if self.model_config is not None and self.model_config.logits_processors: + unsupported.append("custom logits processors") + + if self.cache_config.kv_sharing_fast_prefill: + # Will be added by https://github.com/vllm-project/vllm/pull/35045 + unsupported.append("KV sharing fast prefill") + + if self.ec_transfer_config is not None: + # Will be added by https://github.com/vllm-project/vllm/pull/38390 + unsupported.append("EC transfer") + + if unsupported: + raise ValueError( + "VLLM_USE_V2_MODEL_RUNNER does not yet support: " + + ", ".join(unsupported) + ) + def validate_block_size(self) -> None: """Validate block_size against DCP and mamba constraints.