[CLEANING] Remove unused disable_by_batch_size from SpeculativeConfig (#35060)

Signed-off-by: Vincent Gimenes <vincent.gimenes@gmail.com>
This commit is contained in:
Vincent Gimenes
2026-02-23 14:05:36 +01:00
committed by GitHub
parent 7f40e9e516
commit aa08a30fc9

View File

@@ -101,9 +101,6 @@ class SpeculativeConfig:
will use the default version."""
# Advanced control
disable_by_batch_size: int | None = Field(default=None, ge=2)
"""Disable speculative decoding for new incoming requests when the number
of enqueued requests is larger than this value, if provided."""
disable_padded_drafter_batch: bool = False
"""Disable input padding for speculative decoding. If set to True,
speculative input batches can contain sequences of different lengths,
@@ -707,13 +704,6 @@ class SpeculativeConfig:
self.draft_parallel_config
)
if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2:
raise ValueError(
"Expect the batch size threshold of disabling "
"speculative decoding is > 1, but got "
f"{self.disable_by_batch_size=}"
)
eagle3_target_supported = [
"llama",
"qwen",