[Chore] Cleanup guided namespace, move to structured outputs config (#22772)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -2277,34 +2277,34 @@ def get_served_model_name(model: str,
|
||||
return served_model_name
|
||||
|
||||
|
||||
GuidedDecodingBackend = Literal["auto", "xgrammar", "guidance", "outlines",
|
||||
"lm-format-enforcer"]
|
||||
StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines",
|
||||
"lm-format-enforcer"]
|
||||
|
||||
|
||||
@config
|
||||
@dataclass
|
||||
class DecodingConfig:
|
||||
"""Dataclass which contains the decoding strategy of the engine."""
|
||||
class StructuredOutputsConfig:
|
||||
"""Dataclass which contains structured outputs config for the engine."""
|
||||
|
||||
backend: GuidedDecodingBackend = "auto"
|
||||
"""Which engine will be used for guided decoding (JSON schema / regex etc)
|
||||
by default. With "auto", we will make opinionated choices based on request
|
||||
contents and what the backend libraries currently support, so the behavior
|
||||
is subject to change in each release."""
|
||||
backend: StructuredOutputsBackend = "auto"
|
||||
"""Which engine will be used for structured outputs (e.g. JSON schema,
|
||||
regex, etc) by default. With "auto", we will make opinionated choices
|
||||
based on request contents and what the backend libraries currently support,
|
||||
so the behavior is subject to change in each release."""
|
||||
|
||||
disable_fallback: bool = False
|
||||
"""If `True`, vLLM will not fallback to a different backend on error."""
|
||||
|
||||
disable_any_whitespace: bool = False
|
||||
"""If `True`, the model will not generate any whitespace during guided
|
||||
decoding. This is only supported for xgrammar and guidance backends."""
|
||||
"""If `True`, the model will not generate any whitespace during structured
|
||||
outputs. This is only supported for xgrammar and guidance backends."""
|
||||
|
||||
disable_additional_properties: bool = False
|
||||
"""If `True`, the `guidance` backend will not use `additionalProperties`
|
||||
in the JSON schema. This is only supported for the `guidance` backend and
|
||||
is used to better align its behaviour with `outlines` and `xgrammar`."""
|
||||
|
||||
reasoning_backend: str = ""
|
||||
reasoning_parser: str = ""
|
||||
"""Select the reasoning parser depending on the model that you're using.
|
||||
This is used to parse the reasoning content into OpenAI API format."""
|
||||
|
||||
@@ -2451,8 +2451,9 @@ class VllmConfig:
|
||||
"""LoRA configuration."""
|
||||
speculative_config: Optional[SpeculativeConfig] = None
|
||||
"""Speculative decoding configuration."""
|
||||
decoding_config: DecodingConfig = field(default_factory=DecodingConfig)
|
||||
"""Decoding configuration."""
|
||||
structured_outputs_config: StructuredOutputsConfig = field(
|
||||
default_factory=StructuredOutputsConfig)
|
||||
"""Structured outputs configuration."""
|
||||
observability_config: Optional[ObservabilityConfig] = None
|
||||
"""Observability configuration."""
|
||||
quant_config: Optional[QuantizationConfig] = None
|
||||
@@ -2543,8 +2544,8 @@ class VllmConfig:
|
||||
vllm_factors.append(self.speculative_config.compute_hash())
|
||||
else:
|
||||
vllm_factors.append("None")
|
||||
if self.decoding_config:
|
||||
vllm_factors.append(self.decoding_config.compute_hash())
|
||||
if self.structured_outputs_config:
|
||||
vllm_factors.append(self.structured_outputs_config.compute_hash())
|
||||
else:
|
||||
vllm_factors.append("None")
|
||||
if self.observability_config:
|
||||
@@ -3063,7 +3064,7 @@ class VllmConfig:
|
||||
f"enforce_eager={self.model_config.enforce_eager}, "
|
||||
f"kv_cache_dtype={self.cache_config.cache_dtype}, "
|
||||
f"device_config={self.device_config.device}, "
|
||||
f"decoding_config={self.decoding_config!r}, "
|
||||
f"structured_outputs_config={self.structured_outputs_config!r}, "
|
||||
f"observability_config={self.observability_config!r}, "
|
||||
f"seed={self.model_config.seed}, "
|
||||
f"served_model_name={self.model_config.served_model_name}, "
|
||||
|
||||
Reference in New Issue
Block a user