[Chore] Remove redundant RequestPrompt (#30612)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -27,7 +27,7 @@ from vllm.entrypoints.serve.disagg.protocol import (
|
||||
GenerateResponse,
|
||||
GenerateResponseChoice,
|
||||
)
|
||||
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
|
||||
from vllm.inputs.data import TokensPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob
|
||||
from vllm.outputs import RequestOutput
|
||||
@@ -99,7 +99,7 @@ class ServingTokens(OpenAIServing):
|
||||
|
||||
# TODO(NickLucche): Change to EngineCoreRequest once Renderer work is
|
||||
# completed
|
||||
engine_prompt = EngineTokensPrompt(prompt_token_ids=request.token_ids)
|
||||
engine_prompt = TokensPrompt(prompt_token_ids=request.token_ids)
|
||||
if request.features is not None:
|
||||
engine_prompt["multi_modal_data"] = None
|
||||
|
||||
@@ -115,7 +115,7 @@ class ServingTokens(OpenAIServing):
|
||||
|
||||
self._log_inputs(
|
||||
request_id,
|
||||
request.token_ids,
|
||||
TokensPrompt(prompt_token_ids=request.token_ids),
|
||||
params=sampling_params,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
@@ -21,6 +21,7 @@ from vllm.entrypoints.openai.protocol import (
|
||||
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||
from vllm.entrypoints.renderer import RenderConfig
|
||||
from vllm.inputs import TokensPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
@@ -80,11 +81,8 @@ class OpenAIServingTokenization(OpenAIServing):
|
||||
)
|
||||
if error_check_ret is not None:
|
||||
return error_check_ret
|
||||
(
|
||||
_,
|
||||
_,
|
||||
engine_prompts,
|
||||
) = await self._preprocess_chat(
|
||||
|
||||
_, engine_prompts = await self._preprocess_chat(
|
||||
request,
|
||||
tokenizer,
|
||||
request.messages,
|
||||
@@ -141,7 +139,10 @@ class OpenAIServingTokenization(OpenAIServing):
|
||||
tokenizer = await self.engine_client.get_tokenizer()
|
||||
|
||||
self._log_inputs(
|
||||
request_id, request.tokens, params=None, lora_request=lora_request
|
||||
request_id,
|
||||
TokensPrompt(prompt_token_ids=request.tokens),
|
||||
params=None,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
prompt_input = await self._tokenize_prompt_input_async(
|
||||
|
||||
Reference in New Issue
Block a user