[CLEANING] Remove unused disable_by_batch_size from SpeculativeConfig (#35060)
Signed-off-by: Vincent Gimenes <vincent.gimenes@gmail.com>
This commit is contained in:
@@ -101,9 +101,6 @@ class SpeculativeConfig:
|
||||
will use the default version."""
|
||||
|
||||
# Advanced control
|
||||
disable_by_batch_size: int | None = Field(default=None, ge=2)
|
||||
"""Disable speculative decoding for new incoming requests when the number
|
||||
of enqueued requests is larger than this value, if provided."""
|
||||
disable_padded_drafter_batch: bool = False
|
||||
"""Disable input padding for speculative decoding. If set to True,
|
||||
speculative input batches can contain sequences of different lengths,
|
||||
@@ -707,13 +704,6 @@ class SpeculativeConfig:
|
||||
self.draft_parallel_config
|
||||
)
|
||||
|
||||
if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2:
|
||||
raise ValueError(
|
||||
"Expect the batch size threshold of disabling "
|
||||
"speculative decoding is > 1, but got "
|
||||
f"{self.disable_by_batch_size=}"
|
||||
)
|
||||
|
||||
eagle3_target_supported = [
|
||||
"llama",
|
||||
"qwen",
|
||||
|
||||
Reference in New Issue
Block a user