[Model Runner V2] Support pooling models (#35120)

Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
Woosuk Kwon
2026-02-27 18:03:01 -08:00
committed by GitHub
parent 405f28d38d
commit 86ac7bcf84
6 changed files with 209 additions and 14 deletions

View File

@@ -700,6 +700,12 @@ class Worker(WorkerBase):
output = self.model_runner.execute_model(
scheduler_output, intermediate_tensors
)
if (
self.use_v2_model_runner
and self.model_runner.is_pooling_model
and output is None
):
output = self.model_runner.pool() # type: ignore
if isinstance(
output, ModelRunnerOutput | AsyncModelRunnerOutput | NoneType
):