[Model Runner V2] Support pooling models (#35120)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -700,6 +700,12 @@ class Worker(WorkerBase):
|
||||
output = self.model_runner.execute_model(
|
||||
scheduler_output, intermediate_tensors
|
||||
)
|
||||
if (
|
||||
self.use_v2_model_runner
|
||||
and self.model_runner.is_pooling_model
|
||||
and output is None
|
||||
):
|
||||
output = self.model_runner.pool() # type: ignore
|
||||
if isinstance(
|
||||
output, ModelRunnerOutput | AsyncModelRunnerOutput | NoneType
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user