[Chore] Remove redundant RequestPrompt (#30612)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-14 17:22:37 +08:00
committed by GitHub
parent f569c654e1
commit dcb31196da
12 changed files with 188 additions and 253 deletions

View File

@@ -27,7 +27,7 @@ from vllm.entrypoints.serve.disagg.protocol import (
GenerateResponse,
GenerateResponseChoice,
)
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
from vllm.inputs.data import TokensPrompt
from vllm.logger import init_logger
from vllm.logprobs import Logprob
from vllm.outputs import RequestOutput
@@ -99,7 +99,7 @@ class ServingTokens(OpenAIServing):
# TODO(NickLucche): Change to EngineCoreRequest once Renderer work is
# completed
engine_prompt = EngineTokensPrompt(prompt_token_ids=request.token_ids)
engine_prompt = TokensPrompt(prompt_token_ids=request.token_ids)
if request.features is not None:
engine_prompt["multi_modal_data"] = None
@@ -115,7 +115,7 @@ class ServingTokens(OpenAIServing):
self._log_inputs(
request_id,
request.token_ids,
TokensPrompt(prompt_token_ids=request.token_ids),
params=sampling_params,
lora_request=lora_request,
)

View File

@@ -21,6 +21,7 @@ from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig
from vllm.inputs import TokensPrompt
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
@@ -80,11 +81,8 @@ class OpenAIServingTokenization(OpenAIServing):
)
if error_check_ret is not None:
return error_check_ret
(
_,
_,
engine_prompts,
) = await self._preprocess_chat(
_, engine_prompts = await self._preprocess_chat(
request,
tokenizer,
request.messages,
@@ -141,7 +139,10 @@ class OpenAIServingTokenization(OpenAIServing):
tokenizer = await self.engine_client.get_tokenizer()
self._log_inputs(
request_id, request.tokens, params=None, lora_request=lora_request
request_id,
TokensPrompt(prompt_token_ids=request.tokens),
params=None,
lora_request=lora_request,
)
prompt_input = await self._tokenize_prompt_input_async(