[Chore] Cleanup guided namespace, move to structured outputs config (#22772)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2025-09-18 05:20:27 -04:00
committed by GitHub
parent 05b044e698
commit 29283e8976
51 changed files with 579 additions and 806 deletions

View File

@@ -2277,34 +2277,34 @@ def get_served_model_name(model: str,
return served_model_name
GuidedDecodingBackend = Literal["auto", "xgrammar", "guidance", "outlines",
"lm-format-enforcer"]
StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines",
"lm-format-enforcer"]
@config
@dataclass
class DecodingConfig:
"""Dataclass which contains the decoding strategy of the engine."""
class StructuredOutputsConfig:
"""Dataclass which contains structured outputs config for the engine."""
backend: GuidedDecodingBackend = "auto"
"""Which engine will be used for guided decoding (JSON schema / regex etc)
by default. With "auto", we will make opinionated choices based on request
contents and what the backend libraries currently support, so the behavior
is subject to change in each release."""
backend: StructuredOutputsBackend = "auto"
"""Which engine will be used for structured outputs (e.g. JSON schema,
regex, etc) by default. With "auto", we will make opinionated choices
based on request contents and what the backend libraries currently support,
so the behavior is subject to change in each release."""
disable_fallback: bool = False
"""If `True`, vLLM will not fallback to a different backend on error."""
disable_any_whitespace: bool = False
"""If `True`, the model will not generate any whitespace during guided
decoding. This is only supported for xgrammar and guidance backends."""
"""If `True`, the model will not generate any whitespace during structured
outputs. This is only supported for xgrammar and guidance backends."""
disable_additional_properties: bool = False
"""If `True`, the `guidance` backend will not use `additionalProperties`
in the JSON schema. This is only supported for the `guidance` backend and
is used to better align its behaviour with `outlines` and `xgrammar`."""
reasoning_backend: str = ""
reasoning_parser: str = ""
"""Select the reasoning parser depending on the model that you're using.
This is used to parse the reasoning content into OpenAI API format."""
@@ -2451,8 +2451,9 @@ class VllmConfig:
"""LoRA configuration."""
speculative_config: Optional[SpeculativeConfig] = None
"""Speculative decoding configuration."""
decoding_config: DecodingConfig = field(default_factory=DecodingConfig)
"""Decoding configuration."""
structured_outputs_config: StructuredOutputsConfig = field(
default_factory=StructuredOutputsConfig)
"""Structured outputs configuration."""
observability_config: Optional[ObservabilityConfig] = None
"""Observability configuration."""
quant_config: Optional[QuantizationConfig] = None
@@ -2543,8 +2544,8 @@ class VllmConfig:
vllm_factors.append(self.speculative_config.compute_hash())
else:
vllm_factors.append("None")
if self.decoding_config:
vllm_factors.append(self.decoding_config.compute_hash())
if self.structured_outputs_config:
vllm_factors.append(self.structured_outputs_config.compute_hash())
else:
vllm_factors.append("None")
if self.observability_config:
@@ -3063,7 +3064,7 @@ class VllmConfig:
f"enforce_eager={self.model_config.enforce_eager}, "
f"kv_cache_dtype={self.cache_config.cache_dtype}, "
f"device_config={self.device_config.device}, "
f"decoding_config={self.decoding_config!r}, "
f"structured_outputs_config={self.structured_outputs_config!r}, "
f"observability_config={self.observability_config!r}, "
f"seed={self.model_config.seed}, "
f"served_model_name={self.model_config.served_model_name}, "