Revert "[Renderer] Separate out RendererConfig from ModelConfig (#30145)" (#30199)

This commit is contained in:
Cyrus Leung
2025-12-07 16:00:22 +08:00
committed by GitHub
parent 27f4c2fd46
commit e83b7e379c
105 changed files with 797 additions and 969 deletions

View File

@@ -29,8 +29,8 @@ from vllm.config.model import (
HfOverrides,
ModelDType,
RunnerOption,
TokenizerMode,
)
from vllm.config.renderer import TokenizerMode
from vllm.engine.arg_utils import EngineArgs
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
@@ -343,7 +343,6 @@ class LLM:
logger.info("Supported tasks: %s", supported_tasks)
self.supported_tasks = supported_tasks
self.renderer_config = self.llm_engine.renderer_config
self.model_config = self.llm_engine.model_config
self.input_processor = self.llm_engine.input_processor
self.io_processor = self.llm_engine.io_processor
@@ -809,13 +808,13 @@ class LLM:
list_of_messages = [cast(list[ChatCompletionMessageParam], messages)]
tokenizer = self.get_tokenizer()
renderer_config = self.renderer_config
model_config = self.model_config
resolved_content_format = resolve_chat_template_content_format(
chat_template,
tools,
chat_template_content_format,
tokenizer,
renderer_config=renderer_config,
model_config=model_config,
)
_chat_template_kwargs: dict[str, Any] = dict(
@@ -834,7 +833,7 @@ class LLM:
# the chat message parsing for it.
conversation, mm_data, mm_uuids = parse_chat_messages(
msgs,
renderer_config,
model_config,
content_format=resolved_content_format,
)
@@ -848,7 +847,7 @@ class LLM:
prompt_str = apply_hf_chat_template(
tokenizer=tokenizer,
conversation=conversation,
renderer_config=renderer_config,
model_config=model_config,
**_chat_template_kwargs,
)
# Special tokens are already included in chat templates so
@@ -1291,7 +1290,6 @@ class LLM:
lora_request: list[LoRARequest] | LoRARequest | None = None,
tokenization_kwargs: dict[str, Any] | None = None,
) -> list[ScoringRequestOutput]:
renderer_config = self.renderer_config
model_config = self.model_config
if isinstance(tokenizer, MistralTokenizer):
@@ -1319,7 +1317,7 @@ class LLM:
for q, d in input_pairs:
_, engine_prompt = get_score_prompt(
renderer_config=renderer_config,
model_config=model_config,
data_1=q,
data_2=d,
tokenizer=tokenizer,