[Frontend] New allowed_token_ids decoding request parameter (#6753)

This commit is contained in:
Nick Hill
2024-07-29 16:37:27 -07:00
committed by GitHub
parent 9a7e2d0534
commit 9f69d8245a
5 changed files with 114 additions and 46 deletions

View File

@@ -134,7 +134,7 @@ class OpenAIServingChat(OpenAIServing):
request_id = f"chat-{random_uuid()}"
try:
sampling_params = request.to_sampling_params()
sampling_params = request.to_sampling_params(tokenizer)
decoding_config = await self.engine.get_decoding_config()
guided_decoding_backend = request.guided_decoding_backend \
or decoding_config.guided_decoding_backend