From aa08a30fc90248006ce6202496926f074149b08c Mon Sep 17 00:00:00 2001
From: Vincent Gimenes <147169146+VincentG1234@users.noreply.github.com>
Date: Mon, 23 Feb 2026 14:05:36 +0100
Subject: [PATCH] [CLEANING] Remove unused disable_by_batch_size from
 SpeculativeConfig (#35060)

Signed-off-by: Vincent Gimenes <vincent.gimenes@gmail.com>
---
 vllm/config/speculative.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py
index 207d8c2f6..29f0380d1 100644
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -101,9 +101,6 @@ class SpeculativeConfig:
     will use the default version."""
 
     # Advanced control
-    disable_by_batch_size: int | None = Field(default=None, ge=2)
-    """Disable speculative decoding for new incoming requests when the number
-    of enqueued requests is larger than this value, if provided."""
     disable_padded_drafter_batch: bool = False
     """Disable input padding for speculative decoding. If set to True,
     speculative input batches can contain sequences of different lengths,
@@ -707,13 +704,6 @@ class SpeculativeConfig:
                 self.draft_parallel_config
             )
 
-        if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2:
-            raise ValueError(
-                "Expect the batch size threshold of disabling "
-                "speculative decoding is > 1, but got "
-                f"{self.disable_by_batch_size=}"
-            )
-
         eagle3_target_supported = [
             "llama",
             "qwen",