[Model Runner V2] Add config validation for not-yet-supported features (#38758)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -78,7 +78,6 @@ steps:
|
||||
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
|
||||
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
|
||||
|
||||
# These require fix https://github.com/vllm-project/vllm/pull/36280
|
||||
- label: Model Runner V2 Pipeline Parallelism (4 GPUs)
|
||||
timeout_in_minutes: 60
|
||||
working_dir: "/vllm-workspace/tests"
|
||||
|
||||
@@ -1106,6 +1106,9 @@ class VllmConfig:
|
||||
)
|
||||
current_platform.check_and_update_config(self)
|
||||
|
||||
if envs.VLLM_USE_V2_MODEL_RUNNER:
|
||||
self._validate_v2_model_runner()
|
||||
|
||||
# Re-compute compile ranges after platform-specific config updates
|
||||
# (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
|
||||
self._set_compile_ranges()
|
||||
@@ -1729,6 +1732,49 @@ class VllmConfig:
|
||||
f"kernel_config={self.kernel_config!r}"
|
||||
)
|
||||
|
||||
def _validate_v2_model_runner(self) -> None:
|
||||
"""Check for features not yet supported by the V2 model runner."""
|
||||
unsupported: list[str] = []
|
||||
|
||||
if self.model_config is not None and self.model_config.has_inner_state:
|
||||
unsupported.append("hybrid/mamba models")
|
||||
|
||||
if self.parallel_config.prefill_context_parallel_size > 1:
|
||||
unsupported.append("prefill context parallelism")
|
||||
|
||||
if (
|
||||
self.speculative_config is not None
|
||||
and self.speculative_config.method not in ("eagle", "eagle3", "mtp")
|
||||
):
|
||||
unsupported.append(f"speculative method '{self.speculative_config.method}'")
|
||||
|
||||
if self.parallel_config.enable_dbo:
|
||||
unsupported.append("dual batch overlap")
|
||||
|
||||
if (
|
||||
self.model_config is not None
|
||||
and self.model_config.enable_return_routed_experts
|
||||
):
|
||||
# Will be added by https://github.com/vllm-project/vllm/pull/38163
|
||||
unsupported.append("routed experts capture")
|
||||
|
||||
if self.model_config is not None and self.model_config.logits_processors:
|
||||
unsupported.append("custom logits processors")
|
||||
|
||||
if self.cache_config.kv_sharing_fast_prefill:
|
||||
# Will be added by https://github.com/vllm-project/vllm/pull/35045
|
||||
unsupported.append("KV sharing fast prefill")
|
||||
|
||||
if self.ec_transfer_config is not None:
|
||||
# Will be added by https://github.com/vllm-project/vllm/pull/38390
|
||||
unsupported.append("EC transfer")
|
||||
|
||||
if unsupported:
|
||||
raise ValueError(
|
||||
"VLLM_USE_V2_MODEL_RUNNER does not yet support: "
|
||||
+ ", ".join(unsupported)
|
||||
)
|
||||
|
||||
def validate_block_size(self) -> None:
|
||||
"""Validate block_size against DCP and mamba constraints.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user