[Frontend] Add backend-specific options for guided decoding (#13505)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2025-02-20 13:07:58 -07:00
committed by GitHub
parent 6a417b8600
commit bfbc0b32c6
8 changed files with 123 additions and 42 deletions

View File

@@ -25,6 +25,7 @@ from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
get_quantization_config)
from vllm.model_executor.models import ModelRegistry
from vllm.platforms import CpuArchEnum
from vllm.sampling_params import GuidedDecodingParams
from vllm.tracing import is_otel_available, otel_import_error_traceback
from vllm.transformers_utils.config import (
ConfigFormat, get_config, get_hf_image_processor_config,
@@ -2631,7 +2632,9 @@ class DecodingConfig:
def __post_init__(self):
valid_guided_backends = ['outlines', 'lm-format-enforcer', 'xgrammar']
backend = self.guided_decoding_backend
backend = GuidedDecodingParams(
backend=self.guided_decoding_backend).backend_name
if backend not in valid_guided_backends:
raise ValueError(f"Invalid guided_decoding_backend '{backend},"
f"must be one of {valid_guided_backends}")