[Chore] Cleanup guided namespace, move to structured outputs config (#22772)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -45,7 +45,7 @@ class Processor:
|
||||
self.model_config = vllm_config.model_config
|
||||
self.cache_config = vllm_config.cache_config
|
||||
self.lora_config = vllm_config.lora_config
|
||||
self.decoding_config = vllm_config.decoding_config
|
||||
self.structured_outputs_config = vllm_config.structured_outputs_config
|
||||
self.tokenizer = tokenizer
|
||||
|
||||
self.generation_config_fields = (
|
||||
@@ -219,58 +219,57 @@ class Processor:
|
||||
"[lora_path]` to use the LoRA tokenizer.")
|
||||
|
||||
def _validate_structured_output(self, params: SamplingParams) -> None:
|
||||
if not params.guided_decoding or not self.decoding_config:
|
||||
if not params.structured_outputs or not self.structured_outputs_config:
|
||||
return
|
||||
|
||||
if self.model_config.skip_tokenizer_init and params.guided_decoding:
|
||||
if self.model_config.skip_tokenizer_init and params.structured_outputs:
|
||||
raise ValueError(
|
||||
"Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'" # noqa: E501
|
||||
)
|
||||
|
||||
engine_level_backend = self.decoding_config.backend
|
||||
if params.guided_decoding.backend:
|
||||
# Request-level backend selection is not supported in V1.
|
||||
backend = self.structured_outputs_config.backend
|
||||
if _backend := params.structured_outputs._backend:
|
||||
# Request-level backend selection is not supported.
|
||||
# The values may differ if `params` is reused and was set
|
||||
# to a specific backend based on `auto` behavior in a previous
|
||||
# request. We remember that it was set as a result of `auto`
|
||||
# using the `_auto` option set on the backend in the params.
|
||||
if (params.guided_decoding.backend != engine_level_backend
|
||||
and not (engine_level_backend == "auto"
|
||||
and params.guided_decoding.backend_was_auto)):
|
||||
# using the `_backend_was_auto` field set in the params.
|
||||
if (backend != _backend
|
||||
and not (backend == "auto"
|
||||
and params.structured_outputs._backend_was_auto)):
|
||||
raise ValueError(
|
||||
"Request-level structured output backend selection is no "
|
||||
"longer supported. The request specified "
|
||||
f"'{params.guided_decoding.backend}', but vLLM was "
|
||||
f"initialised with '{engine_level_backend}'. This error "
|
||||
"can be resolved by removing backend selection from the "
|
||||
"request.")
|
||||
"Request-level structured output backend selection is not "
|
||||
f"supported. The request specified '{_backend}', but vLLM "
|
||||
f"was initialised with '{backend}'. This error can be "
|
||||
"resolved by removing '_backend' from the request.")
|
||||
else:
|
||||
params.guided_decoding.backend = engine_level_backend
|
||||
params.structured_outputs._backend = backend
|
||||
|
||||
# Request content validation
|
||||
if (isinstance(params.guided_decoding.choice, list)
|
||||
and not params.guided_decoding.choice):
|
||||
if (isinstance(params.structured_outputs.choice, list)
|
||||
and not params.structured_outputs.choice):
|
||||
# It is invalid for choice to be an empty list
|
||||
raise ValueError(f"Choice '{params.guided_decoding.choice}' "
|
||||
"cannot be an empty list")
|
||||
raise ValueError(
|
||||
f"Choice '{params.structured_outputs.choice}' cannot be an empty list" # noqa: E501
|
||||
)
|
||||
|
||||
if engine_level_backend.startswith("xgrammar"):
|
||||
if backend.startswith("xgrammar"):
|
||||
# xgrammar with no fallback
|
||||
validate_xgrammar_grammar(params)
|
||||
elif engine_level_backend.startswith("guidance"):
|
||||
elif backend.startswith("guidance"):
|
||||
# TODO: ideally we would have the LLTokenizer here as Lark syntax
|
||||
# allows <|special_token|> and similar, see
|
||||
# https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
|
||||
# Without tokenizer these are disallowed in grammars.
|
||||
validate_guidance_grammar(params, tokenizer=None)
|
||||
elif engine_level_backend == "outlines":
|
||||
elif backend == "outlines":
|
||||
# outlines backend
|
||||
validate_structured_output_request_outlines(params)
|
||||
elif engine_level_backend == "lm-format-enforcer":
|
||||
elif backend == "lm-format-enforcer":
|
||||
# lm format enforcer backend
|
||||
validate_structured_output_request_lm_format_enforcer(params)
|
||||
else:
|
||||
# NOTE: engine_level_backend must be "auto" here, because we have
|
||||
# NOTE: backend must be "auto" here, because we have
|
||||
# checked supported_backends above.
|
||||
# In this mode, we set opinionated defaults based on what we think
|
||||
# will satisfy the most use cases without having to worry about
|
||||
@@ -278,15 +277,15 @@ class Processor:
|
||||
# other setting where a specific backend was specified.
|
||||
try:
|
||||
validate_xgrammar_grammar(params)
|
||||
params.guided_decoding.backend = "xgrammar"
|
||||
params.structured_outputs._backend = "xgrammar"
|
||||
except ValueError:
|
||||
# The request either failed validation
|
||||
# or includes some jsonschema feature(s) that
|
||||
# are not supported in xgrammar. Fall back to guidance.
|
||||
validate_guidance_grammar(params, tokenizer=None)
|
||||
params.guided_decoding.backend = "guidance"
|
||||
params.structured_outputs._backend = "guidance"
|
||||
# Remember that this backend was set automatically
|
||||
params.guided_decoding.backend_was_auto = True
|
||||
params.structured_outputs._backend_was_auto = True
|
||||
|
||||
def _maybe_build_mm_uuids(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user