From aa08a30fc90248006ce6202496926f074149b08c Mon Sep 17 00:00:00 2001 From: Vincent Gimenes <147169146+VincentG1234@users.noreply.github.com> Date: Mon, 23 Feb 2026 14:05:36 +0100 Subject: [PATCH] [CLEANING] Remove unused disable_by_batch_size from SpeculativeConfig (#35060) Signed-off-by: Vincent Gimenes --- vllm/config/speculative.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 207d8c2f6..29f0380d1 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -101,9 +101,6 @@ class SpeculativeConfig: will use the default version.""" # Advanced control - disable_by_batch_size: int | None = Field(default=None, ge=2) - """Disable speculative decoding for new incoming requests when the number - of enqueued requests is larger than this value, if provided.""" disable_padded_drafter_batch: bool = False """Disable input padding for speculative decoding. If set to True, speculative input batches can contain sequences of different lengths, @@ -707,13 +704,6 @@ class SpeculativeConfig: self.draft_parallel_config ) - if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2: - raise ValueError( - "Expect the batch size threshold of disabling " - "speculative decoding is > 1, but got " - f"{self.disable_by_batch_size=}" - ) - eagle3_target_supported = [ "llama", "qwen",