[Bugfix]: v1 engine - consider lora adapters in allowed_token_ids (#17855)
Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
@@ -74,6 +74,7 @@ class Processor:
|
||||
def _validate_sampling_params(
|
||||
self,
|
||||
params: SamplingParams,
|
||||
lora_request: Optional[LoRARequest],
|
||||
) -> None:
|
||||
self._validate_structured_output(params)
|
||||
self._validate_logit_bias(params)
|
||||
@@ -82,7 +83,8 @@ class Processor:
|
||||
return
|
||||
if not params.allowed_token_ids:
|
||||
raise ValueError("allowed_token_ids is not None and empty!")
|
||||
vocab_size = self.model_config.get_vocab_size()
|
||||
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
|
||||
vocab_size = len(tokenizer)
|
||||
if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
|
||||
raise ValueError(
|
||||
"allowed_token_ids contains out-of-vocab token id!")
|
||||
@@ -122,6 +124,7 @@ class Processor:
|
||||
def _validate_params(
|
||||
self,
|
||||
params: Union[SamplingParams, PoolingParams],
|
||||
lora_request: Optional[LoRARequest],
|
||||
):
|
||||
"""
|
||||
Validate supported SamplingParam.
|
||||
@@ -132,7 +135,7 @@ class Processor:
|
||||
raise ValueError("V1 does not yet support Pooling models.")
|
||||
|
||||
self._validate_logprobs(params)
|
||||
self._validate_sampling_params(params)
|
||||
self._validate_sampling_params(params, lora_request)
|
||||
self._validate_supported_sampling_params(params)
|
||||
|
||||
def _validate_lora(self, lora_request: Optional[LoRARequest]) -> None:
|
||||
@@ -207,7 +210,7 @@ class Processor:
|
||||
# TODO(woosuk): Support pooling models.
|
||||
# TODO(woosuk): Support encoder-decoder models.
|
||||
self._validate_lora(lora_request)
|
||||
self._validate_params(params)
|
||||
self._validate_params(params, lora_request)
|
||||
if priority != 0:
|
||||
raise ValueError("V1 does not support priority yet.")
|
||||
if trace_headers is not None:
|
||||
|
||||
Reference in New Issue
Block a user