[Cleanup] Remove no-longer-used SpeculativeConfig.enable_chunked_prefill (#27826)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-10-31 10:57:45 -07:00
parent fc16f1c477
commit 9e5bd3076e
3 changed files with 1 additions and 16 deletions
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -78,10 +78,6 @@ class SpeculativeConfig:
    draft_tensor_parallel_size: int | None = Field(default=None, ge=1)
    """The degree of the tensor parallelism for the draft model. Can only be 1
    or the same as the target model's tensor parallel size."""
-    disable_logprobs: bool = True
-    """If set to True, token log probabilities are not returned during
-    speculative decoding. If set to False, token log probabilities are returned
-    according to the log probability settings in SamplingParams."""

    # Draft model configuration
    quantization: me_quant.QuantizationMethods | None = None
@@ -126,12 +122,6 @@ class SpeculativeConfig:
    """The configuration of the target model."""
    target_parallel_config: SkipValidation[ParallelConfig] = None  # type: ignore
    """The parallel configuration for the target model."""
-    enable_chunked_prefill: SkipValidation[bool] = None  # type: ignore
-    """Whether vLLM is configured to use chunked prefill or not. Used for
-    raising an error since it's not yet compatible with speculative decode."""
-    disable_log_stats: SkipValidation[bool] = None  # type: ignore
-    """Whether to disable the periodic printing of stage times in speculative
-    decoding."""

    # params generated in the post-init stage
    draft_model_config: SkipValidation[ModelConfig] = None  # type: ignore