[Frontend] New allowed_token_ids decoding request parameter (#6753)

This commit is contained in:
Nick Hill
2024-07-29 16:37:27 -07:00
committed by GitHub
parent 9a7e2d0534
commit 9f69d8245a
5 changed files with 114 additions and 46 deletions

View File

@@ -95,7 +95,7 @@ class OpenAIServingCompletion(OpenAIServing):
tokenizer = await self.engine.get_tokenizer(lora_request)
sampling_params = request.to_sampling_params()
sampling_params = request.to_sampling_params(tokenizer)
decoding_config = await self.engine.get_decoding_config()
guided_decoding_backend = request.guided_decoding_backend \
or decoding_config.guided_decoding_backend