[Perf] Dont create unnecessary pooling params (#22876)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson
2025-08-14 08:28:09 -04:00
committed by GitHub
parent 540d54ca8d
commit 829b9a62d0

View File

@@ -341,13 +341,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
model_kwargs = dict[str, Any]()
num_reqs = self.input_batch.num_reqs
pooling_params = self.input_batch.pooling_metadata.pooling_params
num_pooling_reqs = len(pooling_params)
num_pooling_reqs = len(self.input_batch.pooling_params)
if num_pooling_reqs == 0:
return model_kwargs
pooling_params = self.input_batch.pooling_metadata.pooling_params
assert num_pooling_reqs == num_reqs
token_type_id_requests = dict[int, Any]()