Fix handling of max_num_batched_tokens for pooling tasks (#23004)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
2025-08-16 14:36:30 -03:00
parent 829bbd7882
commit 52ce1420e9
2 changed files with 5 additions and 8 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@@ -3600,9 +3600,6 @@ class VllmConfig:
                logger.info(reason)
            self.scheduler_config.chunked_prefill_enabled = False
            self.scheduler_config.long_prefill_token_threshold = 0
-            self.scheduler_config.max_num_batched_tokens = max(
-                self.scheduler_config.max_model_len,
-                DEFAULT_MAX_NUM_BATCHED_TOKENS)

            if self.cache_config is not None:
                self.cache_config.enable_prefix_caching = False