diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 207d8c2f6..29f0380d1 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -101,9 +101,6 @@ class SpeculativeConfig: will use the default version.""" # Advanced control - disable_by_batch_size: int | None = Field(default=None, ge=2) - """Disable speculative decoding for new incoming requests when the number - of enqueued requests is larger than this value, if provided.""" disable_padded_drafter_batch: bool = False """Disable input padding for speculative decoding. If set to True, speculative input batches can contain sequences of different lengths, @@ -707,13 +704,6 @@ class SpeculativeConfig: self.draft_parallel_config ) - if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2: - raise ValueError( - "Expect the batch size threshold of disabling " - "speculative decoding is > 1, but got " - f"{self.disable_by_batch_size=}" - ) - eagle3_target_supported = [ "llama", "qwen",