[Bugfix]: v1 engine - consider lora adapters in allowed_token_ids (#17855)

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning
2025-05-11 03:53:58 -04:00
committed by GitHub
parent eea22a56ab
commit 8132365b74
3 changed files with 154 additions and 5 deletions

View File

@@ -74,6 +74,7 @@ class Processor:
def _validate_sampling_params(
self,
params: SamplingParams,
lora_request: Optional[LoRARequest],
) -> None:
self._validate_structured_output(params)
self._validate_logit_bias(params)
@@ -82,7 +83,8 @@ class Processor:
return
if not params.allowed_token_ids:
raise ValueError("allowed_token_ids is not None and empty!")
vocab_size = self.model_config.get_vocab_size()
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
vocab_size = len(tokenizer)
if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
raise ValueError(
"allowed_token_ids contains out-of-vocab token id!")
@@ -122,6 +124,7 @@ class Processor:
def _validate_params(
self,
params: Union[SamplingParams, PoolingParams],
lora_request: Optional[LoRARequest],
):
"""
Validate supported SamplingParam.
@@ -132,7 +135,7 @@ class Processor:
raise ValueError("V1 does not yet support Pooling models.")
self._validate_logprobs(params)
self._validate_sampling_params(params)
self._validate_sampling_params(params, lora_request)
self._validate_supported_sampling_params(params)
def _validate_lora(self, lora_request: Optional[LoRARequest]) -> None:
@@ -207,7 +210,7 @@ class Processor:
# TODO(woosuk): Support pooling models.
# TODO(woosuk): Support encoder-decoder models.
self._validate_lora(lora_request)
self._validate_params(params)
self._validate_params(params, lora_request)
if priority != 0:
raise ValueError("V1 does not support priority yet.")
if trace_headers is not None: