Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -10,15 +10,19 @@ from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger
# yapf conflicts with isort for this block
# yapf: disable
from vllm.entrypoints.openai.protocol import (DetokenizeRequest,
DetokenizeResponse,
ErrorResponse,
TokenizeChatRequest,
TokenizeRequest,
TokenizeResponse,
TokenizerInfoResponse)
from vllm.entrypoints.openai.protocol import (
DetokenizeRequest,
DetokenizeResponse,
ErrorResponse,
TokenizeChatRequest,
TokenizeRequest,
TokenizeResponse,
TokenizerInfoResponse,
)
# yapf: enable
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
@@ -30,7 +34,6 @@ logger = init_logger(__name__)
class OpenAIServingTokenization(OpenAIServing):
def __init__(
self,
engine_client: EngineClient,
@@ -43,11 +46,13 @@ class OpenAIServingTokenization(OpenAIServing):
trust_request_chat_template: bool = False,
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
log_error_stack=log_error_stack)
super().__init__(
engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
log_error_stack=log_error_stack,
)
self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format
@@ -71,13 +76,15 @@ class OpenAIServingTokenization(OpenAIServing):
renderer = self._get_renderer(tokenizer)
if isinstance(request, TokenizeChatRequest):
tool_dicts = (None if request.tools is None else
[tool.model_dump() for tool in request.tools])
tool_dicts = (
None
if request.tools is None
else [tool.model_dump() for tool in request.tools]
)
error_check_ret = self._validate_chat_template(
request_chat_template=request.chat_template,
chat_template_kwargs=request.chat_template_kwargs,
trust_request_chat_template=self.
trust_request_chat_template,
trust_request_chat_template=self.trust_request_chat_template,
)
if error_check_ret is not None:
return error_check_ret
@@ -91,8 +98,7 @@ class OpenAIServingTokenization(OpenAIServing):
request.messages,
tool_dicts=tool_dicts,
chat_template=request.chat_template or self.chat_template,
chat_template_content_format=self.
chat_template_content_format,
chat_template_content_format=self.chat_template_content_format,
add_generation_prompt=request.add_generation_prompt,
continue_final_message=request.continue_final_message,
chat_template_kwargs=request.chat_template_kwargs,
@@ -109,23 +115,23 @@ class OpenAIServingTokenization(OpenAIServing):
input_ids: list[int] = []
for engine_prompt in engine_prompts:
self._log_inputs(request_id,
engine_prompt,
params=None,
lora_request=lora_request)
self._log_inputs(
request_id, engine_prompt, params=None, lora_request=lora_request
)
if isinstance(engine_prompt,
dict) and "prompt_token_ids" in engine_prompt:
if isinstance(engine_prompt, dict) and "prompt_token_ids" in engine_prompt:
input_ids.extend(engine_prompt["prompt_token_ids"])
token_strs = None
if request.return_token_strs:
token_strs = tokenizer.convert_ids_to_tokens(input_ids)
return TokenizeResponse(tokens=input_ids,
token_strs=token_strs,
count=len(input_ids),
max_model_len=self.max_model_len)
return TokenizeResponse(
tokens=input_ids,
token_strs=token_strs,
count=len(input_ids),
max_model_len=self.max_model_len,
)
async def create_detokenize(
self,
@@ -142,10 +148,9 @@ class OpenAIServingTokenization(OpenAIServing):
tokenizer = await self.engine_client.get_tokenizer()
self._log_inputs(request_id,
request.tokens,
params=None,
lora_request=lora_request)
self._log_inputs(
request_id, request.tokens, params=None, lora_request=lora_request
)
prompt_input = await self._tokenize_prompt_input_async(
request,
@@ -157,15 +162,15 @@ class OpenAIServingTokenization(OpenAIServing):
return DetokenizeResponse(prompt=input_text)
async def get_tokenizer_info(
self, ) -> Union[TokenizerInfoResponse, ErrorResponse]:
self,
) -> Union[TokenizerInfoResponse, ErrorResponse]:
"""Get comprehensive tokenizer information."""
try:
tokenizer = await self.engine_client.get_tokenizer()
info = TokenizerInfo(tokenizer, self.chat_template).to_dict()
return TokenizerInfoResponse(**info)
except Exception as e:
return self.create_error_response(
f"Failed to get tokenizer info: {str(e)}")
return self.create_error_response(f"Failed to get tokenizer info: {str(e)}")
def _build_render_config(self, request: TokenizeRequest) -> RenderConfig:
return RenderConfig(add_special_tokens=request.add_special_tokens)