diff --git a/vllm/entrypoints/serve/tokenize/protocol.py b/vllm/entrypoints/serve/tokenize/protocol.py index 66a85a8b6..49d737ef0 100644 --- a/vllm/entrypoints/serve/tokenize/protocol.py +++ b/vllm/entrypoints/serve/tokenize/protocol.py @@ -92,11 +92,11 @@ class TokenizeChatRequest(OpenAIBaseModel): ) mm_processor_kwargs: dict[str, Any] | None = Field( default=None, - description=("Additional kwargs to pass to the HF processor."), + description="Additional kwargs to pass to the HF processor.", ) tools: list[ChatCompletionToolsParam] | None = Field( default=None, - description=("A list of tools the model may call."), + description="A list of tools the model may call.", ) @model_validator(mode="before") diff --git a/vllm/entrypoints/serve/tokenize/serving.py b/vllm/entrypoints/serve/tokenize/serving.py index c80009eaa..a9f375163 100644 --- a/vllm/entrypoints/serve/tokenize/serving.py +++ b/vllm/entrypoints/serve/tokenize/serving.py @@ -62,7 +62,7 @@ class OpenAIServingTokenization(OpenAIServing): if error_check_ret is not None: return error_check_ret - request_id = f"tokn-{self._base_request_id(raw_request)}" + request_id = f"tokenize-{self._base_request_id(raw_request)}" try: lora_request = self._maybe_get_adapters(request) @@ -134,7 +134,7 @@ class OpenAIServingTokenization(OpenAIServing): if error_check_ret is not None: return error_check_ret - request_id = f"tokn-{self._base_request_id(raw_request)}" + request_id = f"tokenize-{self._base_request_id(raw_request)}" lora_request = self._maybe_get_adapters(request)