[Model Runner V2] Add config validation for not-yet-supported features (#38758)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-04-03 12:08:08 -07:00
committed by GitHub
parent a5a623d961
commit 5f1de2b14b
2 changed files with 46 additions and 1 deletions

View File

@@ -78,7 +78,6 @@ steps:
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
# These require fix https://github.com/vllm-project/vllm/pull/36280
- label: Model Runner V2 Pipeline Parallelism (4 GPUs)
timeout_in_minutes: 60
working_dir: "/vllm-workspace/tests"

View File

@@ -1106,6 +1106,9 @@ class VllmConfig:
)
current_platform.check_and_update_config(self)
if envs.VLLM_USE_V2_MODEL_RUNNER:
self._validate_v2_model_runner()
# Re-compute compile ranges after platform-specific config updates
# (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
self._set_compile_ranges()
@@ -1729,6 +1732,49 @@ class VllmConfig:
f"kernel_config={self.kernel_config!r}"
)
def _validate_v2_model_runner(self) -> None:
"""Check for features not yet supported by the V2 model runner."""
unsupported: list[str] = []
if self.model_config is not None and self.model_config.has_inner_state:
unsupported.append("hybrid/mamba models")
if self.parallel_config.prefill_context_parallel_size > 1:
unsupported.append("prefill context parallelism")
if (
self.speculative_config is not None
and self.speculative_config.method not in ("eagle", "eagle3", "mtp")
):
unsupported.append(f"speculative method '{self.speculative_config.method}'")
if self.parallel_config.enable_dbo:
unsupported.append("dual batch overlap")
if (
self.model_config is not None
and self.model_config.enable_return_routed_experts
):
# Will be added by https://github.com/vllm-project/vllm/pull/38163
unsupported.append("routed experts capture")
if self.model_config is not None and self.model_config.logits_processors:
unsupported.append("custom logits processors")
if self.cache_config.kv_sharing_fast_prefill:
# Will be added by https://github.com/vllm-project/vllm/pull/35045
unsupported.append("KV sharing fast prefill")
if self.ec_transfer_config is not None:
# Will be added by https://github.com/vllm-project/vllm/pull/38390
unsupported.append("EC transfer")
if unsupported:
raise ValueError(
"VLLM_USE_V2_MODEL_RUNNER does not yet support: "
+ ", ".join(unsupported)
)
def validate_block_size(self) -> None:
"""Validate block_size against DCP and mamba constraints.