feat: Enable engine-level arguments with speculators models (#25250)

Signed-off-by: Rahul Tuli <rtuli@redhat.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Rahul Tuli
2025-09-21 22:34:45 +05:30
committed by GitHub
parent 0ff8ebb2d7
commit c438b2951c
5 changed files with 128 additions and 85 deletions

View File

@@ -463,15 +463,29 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
return config
def maybe_override_with_speculators_target_model(
def maybe_override_with_speculators(
model: str,
tokenizer: str,
trust_remote_code: bool,
revision: Optional[str] = None,
vllm_speculative_config: Optional[dict[str, Any]] = None,
**kwargs,
) -> tuple[str, str]:
) -> tuple[str, str, Optional[dict[str, Any]]]:
"""
If running a speculators config, override running model with target model
Resolve model configuration when speculators are detected.
Checks if the provided model is a speculators model and if so, extracts
the target model configuration and builds the speculative config.
Args:
model: Model name or path
tokenizer: Tokenizer name or path
trust_remote_code: Whether to trust remote code
revision: Model revision
vllm_speculative_config: Existing vLLM speculative config
Returns:
Tuple of (resolved_model, resolved_tokenizer, speculative_config)
"""
is_gguf = check_gguf_file(model)
if is_gguf:
@@ -487,11 +501,27 @@ def maybe_override_with_speculators_target_model(
token=_get_hf_token(),
**kwargs,
)
spec_config = config_dict.get("speculators_config", None)
# Return the target model
if spec_config is not None:
model = tokenizer = spec_config["verifier"]["name_or_path"]
return model, tokenizer
speculators_config = config_dict.get("speculators_config")
if speculators_config is None:
# No speculators config found, return original values
return model, tokenizer, vllm_speculative_config
# Speculators format detected - process overrides
from vllm.transformers_utils.configs.speculators.base import (
SpeculatorsConfig)
vllm_speculative_config = SpeculatorsConfig.extract_vllm_speculative_config(
config_dict=config_dict)
# Set the draft model to the speculators model
vllm_speculative_config["model"] = model
# Override model and tokenizer with the verifier model from config
verifier_model = speculators_config["verifier"]["name_or_path"]
model = tokenizer = verifier_model
return model, tokenizer, vllm_speculative_config
def get_config(