From ced2a92f40ed56148a6f4496239b55a65f854081 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Thu, 12 Feb 2026 11:33:15 +0800
Subject: [PATCH] [Refactor] Move validation to params definitions (#34362)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/pooling_params.py            |  17 +-
 vllm/sampling_params.py           | 238 ++++++++++++++++++++++++++++
 vllm/v1/engine/input_processor.py | 254 +++---------------------------
 3 files changed, 264 insertions(+), 245 deletions(-)

diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py
index 2251cceef..75d441d74 100644
--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -72,7 +72,7 @@ class PoolingParams(
         """Returns a deep copy of the PoolingParams instance."""
         return deepcopy(self)
 
-    def verify(self, model_config: "ModelConfig") -> None:
+    def verify(self, model_config: ModelConfig) -> None:
         # plugin task uses io_processor.parse_request to verify inputs,
         # skipping PoolingParams verify
         if self.task == "plugin":
@@ -87,12 +87,7 @@ class PoolingParams(
         self._set_default_parameters(model_config)
         self._verify_valid_parameters()
 
-    def _merge_default_parameters(
-        self, model_config: "ModelConfig | None" = None
-    ) -> None:
-        if model_config is None:
-            return
-
+    def _merge_default_parameters(self, model_config: ModelConfig) -> None:
         pooler_config = model_config.pooler_config
         if pooler_config is None:
             return
@@ -119,7 +114,9 @@ class PoolingParams(
         self._verify_step_pooling(pooler_config, valid_parameters)
 
     def _verify_step_pooling(
-        self, pooler_config: "PoolerConfig", valid_parameters: list[str]
+        self,
+        pooler_config: PoolerConfig,
+        valid_parameters: list[str],
     ):
         step_pooling_parameters = ["step_tag_id", "returned_token_ids"]
         if pooler_config.tok_pooling_type != "STEP":
@@ -142,12 +139,12 @@ class PoolingParams(
                 if getattr(self, k, None) is None:
                     setattr(self, k, getattr(pooler_config, k))
 
-    def _set_default_parameters(self, model_config: "ModelConfig | None"):
+    def _set_default_parameters(self, model_config: ModelConfig):
         if self.task in ["embed", "token_embed"]:
             if self.use_activation is None:
                 self.use_activation = True
 
-            if self.dimensions is not None and model_config is not None:
+            if self.dimensions is not None:
                 if not model_config.is_matryoshka:
                     raise ValueError(
                         f'Model "{model_config.served_model_name}" does not '
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 1d097852e..dd354190f 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -3,6 +3,7 @@
 """Sampling parameters for text generation."""
 
 import copy
+import json
 from dataclasses import field
 from enum import Enum, IntEnum
 from functools import cached_property
@@ -11,6 +12,7 @@ from typing import Annotated, Any
 import msgspec
 from pydantic.dataclasses import dataclass
 
+from vllm.config import ModelConfig, SpeculativeConfig, StructuredOutputsConfig
 from vllm.exceptions import VLLMValidationError
 from vllm.logger import init_logger
 from vllm.logits_process import LogitsProcessor
@@ -453,6 +455,11 @@ class SamplingParams(
                 parameter="prompt_logprobs",
                 value=self.prompt_logprobs,
             )
+        if self.logits_processors:
+            # TODO: Remove `logits_processors` attribute
+            raise ValueError(
+                "vLLM V1 does not support per request user-provided logits processors."
+            )
         if self.truncate_prompt_tokens is not None and (
             self.truncate_prompt_tokens == 0 or self.truncate_prompt_tokens < -1
         ):
@@ -589,6 +596,237 @@ class SamplingParams(
         )
         return copy.deepcopy(self, memo=logit_processor_refs)
 
+    def verify(
+        self,
+        model_config: ModelConfig,
+        speculative_config: SpeculativeConfig | None,
+        structured_outputs_config: StructuredOutputsConfig | None,
+        tokenizer: TokenizerLike | None,
+    ) -> None:
+        self._validate_logprobs(model_config)
+        self._validate_logit_bias(model_config)
+        self._validate_allowed_token_ids(tokenizer)
+        self._validate_spec_decode(speculative_config)
+        self._validate_structured_outputs(structured_outputs_config, tokenizer)
+
+    def _validate_logprobs(self, model_config: ModelConfig) -> None:
+        max_logprobs = model_config.max_logprobs
+        if max_logprobs == -1:
+            max_logprobs = model_config.get_vocab_size()
+
+        # Validate sample logprobs.
+        if num_logprobs := self.logprobs:
+            if num_logprobs == -1:
+                num_logprobs = model_config.get_vocab_size()
+            if num_logprobs > max_logprobs:
+                raise VLLMValidationError(
+                    f"Requested sample logprobs of {num_logprobs}, "
+                    f"which is greater than max allowed: {max_logprobs}",
+                    parameter="logprobs",
+                    value=num_logprobs,
+                )
+
+        # Validate prompt logprobs.
+        if num_prompt_logprobs := self.prompt_logprobs:
+            if num_prompt_logprobs == -1:
+                num_prompt_logprobs = model_config.get_vocab_size()
+            if num_prompt_logprobs > max_logprobs:
+                raise VLLMValidationError(
+                    f"Requested prompt logprobs of {num_prompt_logprobs}, "
+                    f"which is greater than max allowed: {max_logprobs}",
+                    parameter="prompt_logprobs",
+                    value=num_prompt_logprobs,
+                )
+
+    def _validate_logit_bias(self, model_config: ModelConfig) -> None:
+        """Validate logit_bias token IDs are within vocabulary range."""
+        if not self.logit_bias:
+            return
+
+        vocab_size = model_config.get_vocab_size()
+        invalid_token_ids = [
+            token_id
+            for token_id in self.logit_bias
+            if token_id < 0 or token_id >= vocab_size
+        ]
+
+        if invalid_token_ids:
+            raise VLLMValidationError(
+                f"token_id(s) {invalid_token_ids} in logit_bias contain "
+                f"out-of-vocab token ids. Vocabulary size: {vocab_size}",
+                parameter="logit_bias",
+                value=invalid_token_ids,
+            )
+
+    def _validate_allowed_token_ids(self, tokenizer: TokenizerLike | None) -> None:
+        allowed_token_ids = self.allowed_token_ids
+        if allowed_token_ids is None:
+            return
+
+        if len(allowed_token_ids) == 0:
+            raise VLLMValidationError(
+                "allowed_token_ids is not None and empty!",
+                parameter="allowed_token_ids",
+                value=allowed_token_ids,
+            )
+
+        if tokenizer is not None:
+            vocab_size = len(tokenizer)
+            invalid_token_ids = [
+                token_id
+                for token_id in allowed_token_ids
+                if token_id < 0 or token_id >= vocab_size
+            ]
+            if invalid_token_ids:
+                raise VLLMValidationError(
+                    "allowed_token_ids contains out-of-vocab token id!",
+                    parameter="allowed_token_ids",
+                    value=invalid_token_ids,
+                )
+
+    def _validate_spec_decode(
+        self,
+        speculative_config: SpeculativeConfig | None,
+    ) -> None:
+        if speculative_config is None:
+            return
+
+        # Some sampling parameters are not yet compatible with spec decoding.
+        if self.min_tokens > 1 or self.min_p > _SAMPLING_EPS or self.logit_bias:
+            raise ValueError(
+                "The min_tokens, min_p, and logit_bias sampling parameters "
+                "are not yet supported with speculative decoding."
+            )
+
+    def _validate_structured_outputs(
+        self,
+        structured_outputs_config: StructuredOutputsConfig | None,
+        tokenizer: TokenizerLike | None,
+    ) -> None:
+        if structured_outputs_config is None or self.structured_outputs is None:
+            return
+
+        if tokenizer is None:
+            raise ValueError(
+                "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'"  # noqa: E501
+            )
+
+        backend = structured_outputs_config.backend
+        if _backend := self.structured_outputs._backend:
+            # Request-level backend selection is not supported.
+            # The values may differ if `params` is reused and was set
+            # to a specific backend based on `auto` behavior in a previous
+            # request. We remember that it was set as a result of `auto`
+            # using the `_backend_was_auto` field set in the params.
+            if backend != _backend and not (
+                backend == "auto" and self.structured_outputs._backend_was_auto
+            ):
+                raise ValueError(
+                    "Request-level structured output backend selection is not "
+                    f"supported. The request specified '{_backend}', but vLLM "
+                    f"was initialised with '{backend}'. This error can be "
+                    "resolved by removing '_backend' from the request."
+                )
+        else:
+            self.structured_outputs._backend = backend
+
+        # Request content validation
+        if (
+            isinstance(self.structured_outputs.choice, list)
+            and not self.structured_outputs.choice
+        ):
+            # It is invalid for choice to be an empty list
+            raise ValueError(
+                f"Choice '{self.structured_outputs.choice}' cannot be an empty list"  # noqa: E501
+            )
+        # Reject empty string grammar early to avoid engine-side crashes
+        if (
+            isinstance(self.structured_outputs.grammar, str)
+            and self.structured_outputs.grammar.strip() == ""
+        ):
+            raise ValueError("structured_outputs.grammar cannot be an empty string")
+
+        from vllm.tokenizers.mistral import MistralTokenizer
+        from vllm.v1.structured_output.backend_guidance import (
+            has_guidance_unsupported_json_features,
+            validate_guidance_grammar,
+        )
+        from vllm.v1.structured_output.backend_lm_format_enforcer import (
+            validate_structured_output_request_lm_format_enforcer,
+        )
+        from vllm.v1.structured_output.backend_outlines import (
+            validate_structured_output_request_outlines,
+        )
+        from vllm.v1.structured_output.backend_xgrammar import validate_xgrammar_grammar
+
+        if backend.startswith("xgrammar"):
+            # xgrammar with no fallback
+            validate_xgrammar_grammar(self)
+        elif backend.startswith("guidance"):
+            # TODO: ideally we would have the LLTokenizer here as Lark syntax
+            # allows <|special_token|> and similar, see
+            # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
+            # Without tokenizer these are disallowed in grammars.
+            if isinstance(tokenizer, MistralTokenizer):
+                raise ValueError(
+                    "Mistral tokenizer is not supported for the 'guidance' "
+                    "structured output backend. Please use ['xgrammar', 'outlines'] "
+                    "backends or tokenizer_mode='hf' instead."
+                )
+            validate_guidance_grammar(self, tokenizer=None)
+        elif backend == "outlines":
+            # outlines backend
+            validate_structured_output_request_outlines(self)
+        elif backend == "lm-format-enforcer":
+            # lm format enforcer backend
+            if isinstance(tokenizer, MistralTokenizer):
+                raise ValueError(
+                    "Mistral tokenizer is not supported for the 'lm-format-enforcer' "
+                    "structured output backend. Please use ['xgrammar', 'outlines'] "
+                    "backends or tokenizer_mode='hf' instead."
+                )
+            validate_structured_output_request_lm_format_enforcer(self)
+        else:
+            # NOTE: backend must be "auto" here, because we have
+            # checked supported_backends above.
+            # In this mode, we set opinionated defaults based on what we think
+            # will satisfy the most use cases without having to worry about
+            # this setting. We include fallback behavior here, but not with any
+            # other setting where a specific backend was specified.
+            try:
+                validate_xgrammar_grammar(self)
+                self.structured_outputs._backend = "xgrammar"
+            except ValueError:
+                # The request either failed validation
+                # or includes some jsonschema feature(s) that
+                # are not supported in xgrammar.
+
+                # Check if schema has features unsupported by guidance
+                so_params = self.structured_outputs
+                skip_guidance = False
+                if so_params.json:
+                    if isinstance(so_params.json, str):
+                        schema = json.loads(so_params.json)
+                    else:
+                        schema = so_params.json
+                    skip_guidance = has_guidance_unsupported_json_features(schema)
+
+                if isinstance(tokenizer, MistralTokenizer) or skip_guidance:
+                    # Fall back to outlines if the tokenizer is Mistral
+                    # or if schema contains features unsupported by guidance
+                    validate_structured_output_request_outlines(self)
+                    self.structured_outputs._backend = "outlines"
+                else:
+                    # Fall back to guidance by default.
+                    validate_guidance_grammar(self, tokenizer=None)
+                    self.structured_outputs._backend = "guidance"
+            # Remember that this backend was set automatically
+            self.structured_outputs._backend_was_auto = True
+
+        # Run post-init validation. This is also important to ensure subsequent
+        # roundtrip serialization/deserialization won't fail.
+        self.structured_outputs.__post_init__()
+
     def __repr__(self) -> str:
         return (
             f"SamplingParams(n={self.n}, "
diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py
index 0e52e2d20..17f4c6dec 100644
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -6,7 +6,6 @@ from collections.abc import Mapping
 from typing import Any, Literal, cast
 
 from vllm.config import VllmConfig
-from vllm.exceptions import VLLMValidationError
 from vllm.inputs.data import (
     ProcessorInputs,
     PromptType,
@@ -30,25 +29,13 @@ from vllm.multimodal.utils import argsort_mm_positions
 from vllm.pooling_params import PoolingParams
 from vllm.renderers import BaseRenderer
 from vllm.renderers.inputs import DictPrompt, TokPrompt
-from vllm.sampling_params import _SAMPLING_EPS, SamplingParams
+from vllm.sampling_params import SamplingParams
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.tokenizers import TokenizerLike
-from vllm.tokenizers.mistral import MistralTokenizer
 from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid
 from vllm.utils.torch_utils import set_default_torch_num_threads
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.metrics.stats import MultiModalCacheStats
-from vllm.v1.structured_output.backend_guidance import (
-    has_guidance_unsupported_json_features,
-    validate_guidance_grammar,
-)
-from vllm.v1.structured_output.backend_lm_format_enforcer import (
-    validate_structured_output_request_lm_format_enforcer,
-)
-from vllm.v1.structured_output.backend_outlines import (
-    validate_structured_output_request_outlines,
-)
-from vllm.v1.structured_output.backend_xgrammar import validate_xgrammar_grammar
 
 logger = init_logger(__name__)
 
@@ -64,6 +51,7 @@ class InputProcessor:
         self.cache_config = vllm_config.cache_config
         self.lora_config = vllm_config.lora_config
         self.scheduler_config = vllm_config.scheduler_config
+        self.speculative_config = vllm_config.speculative_config
         self.structured_outputs_config = vllm_config.structured_outputs_config
         self.observability_config = vllm_config.observability_config
 
@@ -101,101 +89,6 @@ class InputProcessor:
     def renderer(self) -> BaseRenderer:
         return self.input_preprocessor.renderer
 
-    def _validate_logprobs(
-        self,
-        params: SamplingParams,
-    ) -> None:
-        max_logprobs = self.model_config.max_logprobs
-        if max_logprobs == -1:
-            max_logprobs = self.model_config.get_vocab_size()
-
-        # Validate sample logprobs.
-        if params.logprobs:
-            num_logprobs = params.logprobs
-            if num_logprobs == -1:
-                num_logprobs = self.model_config.get_vocab_size()
-            if num_logprobs > max_logprobs:
-                raise VLLMValidationError(
-                    f"Requested sample logprobs of {num_logprobs}, "
-                    f"which is greater than max allowed: {max_logprobs}",
-                    parameter="logprobs",
-                    value=num_logprobs,
-                )
-
-        # Validate prompt logprobs.
-        if params.prompt_logprobs:
-            num_prompt_logprobs = params.prompt_logprobs
-            if num_prompt_logprobs == -1:
-                num_prompt_logprobs = self.model_config.get_vocab_size()
-            if num_prompt_logprobs > max_logprobs:
-                raise VLLMValidationError(
-                    f"Requested prompt logprobs of {num_prompt_logprobs}, "
-                    f"which is greater than max allowed: {max_logprobs}",
-                    parameter="prompt_logprobs",
-                    value=num_prompt_logprobs,
-                )
-
-    def _validate_sampling_params(
-        self,
-        params: SamplingParams,
-    ) -> None:
-        self._validate_structured_output(params)
-        self._validate_logit_bias(params)
-
-        if params.allowed_token_ids is None:
-            return
-        if not params.allowed_token_ids:
-            raise ValueError("allowed_token_ids is not None and empty!")
-        if self.tokenizer is None:
-            # When skip_tokenizer_init=True, we can't validate token IDs
-            # Skip validation and let the model handle invalid tokens
-            return
-        vocab_size = len(self.tokenizer)
-        if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
-            raise ValueError("allowed_token_ids contains out-of-vocab token id!")
-
-    def _validate_logit_bias(
-        self,
-        params: SamplingParams,
-    ) -> None:
-        """Validate logit_bias token IDs are within vocabulary range."""
-        if not params.logit_bias:
-            return
-
-        vocab_size = self.model_config.get_vocab_size()
-        invalid_token_ids = []
-
-        for token_id in params.logit_bias:
-            if token_id < 0 or token_id >= vocab_size:
-                invalid_token_ids.append(token_id)
-
-        if invalid_token_ids:
-            raise VLLMValidationError(
-                f"token_id(s) {invalid_token_ids} in logit_bias contain "
-                f"out-of-vocab token ids. Vocabulary size: {vocab_size}",
-                parameter="logit_bias",
-                value=invalid_token_ids,
-            )
-
-    def _validate_supported_sampling_params(
-        self,
-        params: SamplingParams,
-    ) -> None:
-        # Logits processors not supported.
-        if params.logits_processors:
-            raise ValueError(
-                "vLLM V1 does not support per request user-provided logits processors."
-            )
-
-        # Some sampling parameters are not yet compatible with spec decoding.
-        if self.vllm_config.speculative_config is not None and (
-            params.min_tokens > 1 or params.min_p > _SAMPLING_EPS or params.logit_bias
-        ):
-            raise ValueError(
-                "The min_tokens, min_p, and logit_bias sampling parameters "
-                "are not yet supported with speculative decoding."
-            )
-
     def _validate_params(
         self,
         params: SamplingParams | PoolingParams,
@@ -203,11 +96,15 @@ class InputProcessor:
         # is passed to all `process_inputs` calls
         supported_tasks: tuple[SupportedTask, ...] | None,
     ):
-        """
-        Validate supported SamplingParam.
-        Should raise ValueError if unsupported for API Server.
-        """
-        if isinstance(params, PoolingParams):
+        """Raise `ValueError` if SamplingParams or PoolingParams is not valid."""
+        if isinstance(params, SamplingParams):
+            params.verify(
+                self.model_config,
+                self.speculative_config,
+                self.structured_outputs_config,
+                self.tokenizer,
+            )
+        elif isinstance(params, PoolingParams):
             if supported_tasks is None:
                 raise RuntimeError("`supported_tasks` must be passed for pooling")
 
@@ -233,12 +130,11 @@ class InputProcessor:
                 )
 
             params.verify(self.model_config)
-
-            return
-
-        self._validate_logprobs(params)
-        self._validate_sampling_params(params)
-        self._validate_supported_sampling_params(params)
+        else:
+            raise TypeError(
+                f"params must be either SamplingParams or PoolingParams, "
+                f"but got {type(params).__name__}"
+            )
 
     def _parse_mm_items(self, mm_data: MultiModalDataDict) -> MultiModalDataItems:
         mm_processor = self.input_preprocessor._get_mm_processor()
@@ -334,120 +230,6 @@ class InputProcessor:
                 "[lora_path]` to use the LoRA tokenizer."
             )
 
-    def _validate_structured_output(self, params: SamplingParams) -> None:
-        if not params.structured_outputs or not self.structured_outputs_config:
-            return
-
-        if self.model_config.skip_tokenizer_init and params.structured_outputs:
-            raise ValueError(
-                "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'"  # noqa: E501
-            )
-
-        backend = self.structured_outputs_config.backend
-        if _backend := params.structured_outputs._backend:
-            # Request-level backend selection is not supported.
-            # The values may differ if `params` is reused and was set
-            # to a specific backend based on `auto` behavior in a previous
-            # request. We remember that it was set as a result of `auto`
-            # using the `_backend_was_auto` field set in the params.
-            if backend != _backend and not (
-                backend == "auto" and params.structured_outputs._backend_was_auto
-            ):
-                raise ValueError(
-                    "Request-level structured output backend selection is not "
-                    f"supported. The request specified '{_backend}', but vLLM "
-                    f"was initialised with '{backend}'. This error can be "
-                    "resolved by removing '_backend' from the request."
-                )
-        else:
-            params.structured_outputs._backend = backend
-
-        # Request content validation
-        if (
-            isinstance(params.structured_outputs.choice, list)
-            and not params.structured_outputs.choice
-        ):
-            # It is invalid for choice to be an empty list
-            raise ValueError(
-                f"Choice '{params.structured_outputs.choice}' cannot be an empty list"  # noqa: E501
-            )
-        # Reject empty string grammar early to avoid engine-side crashes
-        if (
-            isinstance(params.structured_outputs.grammar, str)
-            and params.structured_outputs.grammar.strip() == ""
-        ):
-            raise ValueError("structured_outputs.grammar cannot be an empty string")
-
-        if backend.startswith("xgrammar"):
-            # xgrammar with no fallback
-            validate_xgrammar_grammar(params)
-        elif backend.startswith("guidance"):
-            # TODO: ideally we would have the LLTokenizer here as Lark syntax
-            # allows <|special_token|> and similar, see
-            # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
-            # Without tokenizer these are disallowed in grammars.
-            if isinstance(self.tokenizer, MistralTokenizer):
-                raise ValueError(
-                    "Mistral tokenizer is not supported for the 'guidance' "
-                    "structured output backend. Please use ['xgrammar', 'outlines'] "
-                    "backends or tokenizer_mode='hf' instead."
-                )
-            validate_guidance_grammar(params, tokenizer=None)
-        elif backend == "outlines":
-            # outlines backend
-            validate_structured_output_request_outlines(params)
-        elif backend == "lm-format-enforcer":
-            # lm format enforcer backend
-            if isinstance(self.tokenizer, MistralTokenizer):
-                raise ValueError(
-                    "Mistral tokenizer is not supported for the 'lm-format-enforcer' "
-                    "structured output backend. Please use ['xgrammar', 'outlines'] "
-                    "backends or tokenizer_mode='hf' instead."
-                )
-            validate_structured_output_request_lm_format_enforcer(params)
-        else:
-            # NOTE: backend must be "auto" here, because we have
-            # checked supported_backends above.
-            # In this mode, we set opinionated defaults based on what we think
-            # will satisfy the most use cases without having to worry about
-            # this setting. We include fallback behavior here, but not with any
-            # other setting where a specific backend was specified.
-            try:
-                validate_xgrammar_grammar(params)
-                params.structured_outputs._backend = "xgrammar"
-            except ValueError:
-                # The request either failed validation
-                # or includes some jsonschema feature(s) that
-                # are not supported in xgrammar.
-
-                # Check if schema has features unsupported by guidance
-                so_params = params.structured_outputs
-                skip_guidance = False
-                if so_params.json:
-                    if isinstance(so_params.json, str):
-                        import json
-
-                        schema = json.loads(so_params.json)
-                    else:
-                        schema = so_params.json
-                    skip_guidance = has_guidance_unsupported_json_features(schema)
-
-                if isinstance(self.tokenizer, MistralTokenizer) or skip_guidance:
-                    # Fall back to outlines if the tokenizer is Mistral
-                    # or if schema contains features unsupported by guidance
-                    validate_structured_output_request_outlines(params)
-                    params.structured_outputs._backend = "outlines"
-                else:
-                    # Fall back to guidance by default.
-                    validate_guidance_grammar(params, tokenizer=None)
-                    params.structured_outputs._backend = "guidance"
-            # Remember that this backend was set automatically
-            params.structured_outputs._backend_was_auto = True
-
-        # Run post-init validation. This is also important to ensure subsequent
-        # roundtrip serialization/deserialization won't fail.
-        params.structured_outputs.__post_init__()
-
     def _extract_singleton_mm_data(
         self, prompt: SingletonPrompt
     ) -> MultiModalDataDict | None:
@@ -618,8 +400,10 @@ class InputProcessor:
                     prompt_token_ids, prompt_embeds
                 )
                 sampling_params.max_tokens = self.model_config.max_model_len - seq_len
+
             sampling_params.update_from_generation_config(
-                self.generation_config_fields, eos_token_id
+                self.generation_config_fields,
+                None if self.tokenizer is None else self.tokenizer.eos_token_id,
             )
             if self.tokenizer is not None:
                 sampling_params.update_from_tokenizer(self.tokenizer)