[Chore] Cleanup guided namespace, move to structured outputs config (#22772)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-09-18 05:20:27 -04:00
parent 05b044e698
commit 29283e8976
51 changed files with 579 additions and 806 deletions
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -45,7 +45,7 @@ class Processor:
        self.model_config = vllm_config.model_config
        self.cache_config = vllm_config.cache_config
        self.lora_config = vllm_config.lora_config
-        self.decoding_config = vllm_config.decoding_config
+        self.structured_outputs_config = vllm_config.structured_outputs_config
        self.tokenizer = tokenizer

        self.generation_config_fields = (
@@ -219,58 +219,57 @@ class Processor:
                "[lora_path]` to use the LoRA tokenizer.")

    def _validate_structured_output(self, params: SamplingParams) -> None:
-        if not params.guided_decoding or not self.decoding_config:
+        if not params.structured_outputs or not self.structured_outputs_config:
            return

-        if self.model_config.skip_tokenizer_init and params.guided_decoding:
+        if self.model_config.skip_tokenizer_init and params.structured_outputs:
            raise ValueError(
                "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'"  # noqa: E501
            )

-        engine_level_backend = self.decoding_config.backend
-        if params.guided_decoding.backend:
-            # Request-level backend selection is not supported in V1.
+        backend = self.structured_outputs_config.backend
+        if _backend := params.structured_outputs._backend:
+            # Request-level backend selection is not supported.
            # The values may differ if `params` is reused and was set
            # to a specific backend based on `auto` behavior in a previous
            # request. We remember that it was set as a result of `auto`
-            # using the `_auto` option set on the backend in the params.
-            if (params.guided_decoding.backend != engine_level_backend
-                    and not (engine_level_backend == "auto"
-                             and params.guided_decoding.backend_was_auto)):
+            # using the `_backend_was_auto` field set in the params.
+            if (backend != _backend
+                    and not (backend == "auto"
+                             and params.structured_outputs._backend_was_auto)):
                raise ValueError(
-                    "Request-level structured output backend selection is no "
-                    "longer supported. The request specified "
-                    f"'{params.guided_decoding.backend}', but vLLM was "
-                    f"initialised with '{engine_level_backend}'. This error "
-                    "can be resolved by removing backend selection from the "
-                    "request.")
+                    "Request-level structured output backend selection is not "
+                    f"supported. The request specified '{_backend}', but vLLM "
+                    f"was initialised with '{backend}'. This error can be "
+                    "resolved by removing '_backend' from the request.")
        else:
-            params.guided_decoding.backend = engine_level_backend
+            params.structured_outputs._backend = backend

        # Request content validation
-        if (isinstance(params.guided_decoding.choice, list)
-                and not params.guided_decoding.choice):
+        if (isinstance(params.structured_outputs.choice, list)
+                and not params.structured_outputs.choice):
            # It is invalid for choice to be an empty list
-            raise ValueError(f"Choice '{params.guided_decoding.choice}' "
-                             "cannot be an empty list")
+            raise ValueError(
+                f"Choice '{params.structured_outputs.choice}' cannot be an empty list"  # noqa: E501
+            )

-        if engine_level_backend.startswith("xgrammar"):
+        if backend.startswith("xgrammar"):
            # xgrammar with no fallback
            validate_xgrammar_grammar(params)
-        elif engine_level_backend.startswith("guidance"):
+        elif backend.startswith("guidance"):
            # TODO: ideally we would have the LLTokenizer here as Lark syntax
            # allows <|special_token|> and similar, see
            # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
            # Without tokenizer these are disallowed in grammars.
            validate_guidance_grammar(params, tokenizer=None)
-        elif engine_level_backend == "outlines":
+        elif backend == "outlines":
            # outlines backend
            validate_structured_output_request_outlines(params)
-        elif engine_level_backend == "lm-format-enforcer":
+        elif backend == "lm-format-enforcer":
            # lm format enforcer backend
            validate_structured_output_request_lm_format_enforcer(params)
        else:
-            # NOTE: engine_level_backend must be "auto" here, because we have
+            # NOTE: backend must be "auto" here, because we have
            # checked supported_backends above.
            # In this mode, we set opinionated defaults based on what we think
            # will satisfy the most use cases without having to worry about
@@ -278,15 +277,15 @@ class Processor:
            # other setting where a specific backend was specified.
            try:
                validate_xgrammar_grammar(params)
-                params.guided_decoding.backend = "xgrammar"
+                params.structured_outputs._backend = "xgrammar"
            except ValueError:
                # The request either failed validation
                # or includes some jsonschema feature(s) that
                # are not supported in xgrammar. Fall back to guidance.
                validate_guidance_grammar(params, tokenizer=None)
-                params.guided_decoding.backend = "guidance"
+                params.structured_outputs._backend = "guidance"
            # Remember that this backend was set automatically
-            params.guided_decoding.backend_was_auto = True
+            params.structured_outputs._backend_was_auto = True

    def _maybe_build_mm_uuids(
        self,