[Chore] Cleanup guided namespace, move to structured outputs config (#22772)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2025-09-18 05:20:27 -04:00
committed by GitHub
parent 05b044e698
commit 29283e8976
51 changed files with 579 additions and 806 deletions

View File

@@ -45,7 +45,7 @@ class Processor:
self.model_config = vllm_config.model_config
self.cache_config = vllm_config.cache_config
self.lora_config = vllm_config.lora_config
self.decoding_config = vllm_config.decoding_config
self.structured_outputs_config = vllm_config.structured_outputs_config
self.tokenizer = tokenizer
self.generation_config_fields = (
@@ -219,58 +219,57 @@ class Processor:
"[lora_path]` to use the LoRA tokenizer.")
def _validate_structured_output(self, params: SamplingParams) -> None:
if not params.guided_decoding or not self.decoding_config:
if not params.structured_outputs or not self.structured_outputs_config:
return
if self.model_config.skip_tokenizer_init and params.guided_decoding:
if self.model_config.skip_tokenizer_init and params.structured_outputs:
raise ValueError(
"Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'" # noqa: E501
)
engine_level_backend = self.decoding_config.backend
if params.guided_decoding.backend:
# Request-level backend selection is not supported in V1.
backend = self.structured_outputs_config.backend
if _backend := params.structured_outputs._backend:
# Request-level backend selection is not supported.
# The values may differ if `params` is reused and was set
# to a specific backend based on `auto` behavior in a previous
# request. We remember that it was set as a result of `auto`
# using the `_auto` option set on the backend in the params.
if (params.guided_decoding.backend != engine_level_backend
and not (engine_level_backend == "auto"
and params.guided_decoding.backend_was_auto)):
# using the `_backend_was_auto` field set in the params.
if (backend != _backend
and not (backend == "auto"
and params.structured_outputs._backend_was_auto)):
raise ValueError(
"Request-level structured output backend selection is no "
"longer supported. The request specified "
f"'{params.guided_decoding.backend}', but vLLM was "
f"initialised with '{engine_level_backend}'. This error "
"can be resolved by removing backend selection from the "
"request.")
"Request-level structured output backend selection is not "
f"supported. The request specified '{_backend}', but vLLM "
f"was initialised with '{backend}'. This error can be "
"resolved by removing '_backend' from the request.")
else:
params.guided_decoding.backend = engine_level_backend
params.structured_outputs._backend = backend
# Request content validation
if (isinstance(params.guided_decoding.choice, list)
and not params.guided_decoding.choice):
if (isinstance(params.structured_outputs.choice, list)
and not params.structured_outputs.choice):
# It is invalid for choice to be an empty list
raise ValueError(f"Choice '{params.guided_decoding.choice}' "
"cannot be an empty list")
raise ValueError(
f"Choice '{params.structured_outputs.choice}' cannot be an empty list" # noqa: E501
)
if engine_level_backend.startswith("xgrammar"):
if backend.startswith("xgrammar"):
# xgrammar with no fallback
validate_xgrammar_grammar(params)
elif engine_level_backend.startswith("guidance"):
elif backend.startswith("guidance"):
# TODO: ideally we would have the LLTokenizer here as Lark syntax
# allows <|special_token|> and similar, see
# https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
# Without tokenizer these are disallowed in grammars.
validate_guidance_grammar(params, tokenizer=None)
elif engine_level_backend == "outlines":
elif backend == "outlines":
# outlines backend
validate_structured_output_request_outlines(params)
elif engine_level_backend == "lm-format-enforcer":
elif backend == "lm-format-enforcer":
# lm format enforcer backend
validate_structured_output_request_lm_format_enforcer(params)
else:
# NOTE: engine_level_backend must be "auto" here, because we have
# NOTE: backend must be "auto" here, because we have
# checked supported_backends above.
# In this mode, we set opinionated defaults based on what we think
# will satisfy the most use cases without having to worry about
@@ -278,15 +277,15 @@ class Processor:
# other setting where a specific backend was specified.
try:
validate_xgrammar_grammar(params)
params.guided_decoding.backend = "xgrammar"
params.structured_outputs._backend = "xgrammar"
except ValueError:
# The request either failed validation
# or includes some jsonschema feature(s) that
# are not supported in xgrammar. Fall back to guidance.
validate_guidance_grammar(params, tokenizer=None)
params.guided_decoding.backend = "guidance"
params.structured_outputs._backend = "guidance"
# Remember that this backend was set automatically
params.guided_decoding.backend_was_auto = True
params.structured_outputs._backend_was_auto = True
def _maybe_build_mm_uuids(
self,