[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -23,9 +23,10 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.outputs import PoolingRequestOutput, RequestOutput
|
||||
from vllm.plugins.io_processors import get_io_processor
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.renderers import RendererLike
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.tasks import SupportedTask
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tracing import init_tracer
|
||||
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
@@ -106,9 +107,7 @@ class AsyncLLM(EngineClient):
|
||||
"enabling logging without default stat loggers."
|
||||
)
|
||||
|
||||
tokenizer = cached_tokenizer_from_config(self.model_config)
|
||||
|
||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||
self.input_processor = InputProcessor(self.vllm_config)
|
||||
self.io_processor = get_io_processor(
|
||||
self.vllm_config,
|
||||
self.model_config.io_processor_plugin,
|
||||
@@ -709,13 +708,12 @@ class AsyncLLM(EngineClient):
|
||||
def tokenizer(self) -> TokenizerLike | None:
|
||||
return self.input_processor.tokenizer
|
||||
|
||||
async def get_tokenizer(self) -> TokenizerLike:
|
||||
if self.tokenizer is None:
|
||||
raise ValueError(
|
||||
"Unable to get tokenizer because `skip_tokenizer_init=True`"
|
||||
)
|
||||
def get_tokenizer(self) -> TokenizerLike:
|
||||
return self.input_processor.get_tokenizer()
|
||||
|
||||
return self.tokenizer
|
||||
@property
|
||||
def renderer(self) -> RendererLike:
|
||||
return self.input_processor.renderer
|
||||
|
||||
async def is_tracing_enabled(self) -> bool:
|
||||
return self.observability_config.otlp_traces_endpoint is not None # type: ignore
|
||||
|
||||
@@ -19,6 +19,7 @@ from vllm.multimodal.parse import MultiModalDataParser
|
||||
from vllm.multimodal.processing.context import set_request_id
|
||||
from vllm.multimodal.utils import argsort_mm_positions
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.renderers import RendererLike
|
||||
from vllm.sampling_params import _SAMPLING_EPS, SamplingParams
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers.mistral import MistralTokenizer
|
||||
@@ -45,7 +46,6 @@ class InputProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
vllm_config: VllmConfig,
|
||||
tokenizer: TokenizerLike | None,
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
) -> None:
|
||||
self.vllm_config = vllm_config
|
||||
@@ -61,8 +61,7 @@ class InputProcessor:
|
||||
|
||||
self.input_preprocessor = InputPreprocessor(
|
||||
self.model_config,
|
||||
tokenizer,
|
||||
self.vllm_config.observability_config,
|
||||
vllm_config.observability_config,
|
||||
mm_registry,
|
||||
mm_processor_cache=self.mm_processor_cache,
|
||||
)
|
||||
@@ -71,6 +70,13 @@ class InputProcessor:
|
||||
def tokenizer(self) -> TokenizerLike | None:
|
||||
return self.input_preprocessor.tokenizer
|
||||
|
||||
def get_tokenizer(self) -> TokenizerLike:
|
||||
return self.input_preprocessor.get_tokenizer()
|
||||
|
||||
@property
|
||||
def renderer(self) -> RendererLike:
|
||||
return self.input_preprocessor.renderer
|
||||
|
||||
def _validate_logprobs(
|
||||
self,
|
||||
params: SamplingParams,
|
||||
|
||||
@@ -21,9 +21,10 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.outputs import PoolingRequestOutput, RequestOutput
|
||||
from vllm.plugins.io_processors import get_io_processor
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.renderers import RendererLike
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.tasks import SupportedTask
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tracing import init_tracer
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
@@ -84,9 +85,7 @@ class LLMEngine:
|
||||
self.dp_group = None
|
||||
self.should_execute_dummy_batch = False
|
||||
|
||||
tokenizer = cached_tokenizer_from_config(self.model_config)
|
||||
|
||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||
self.input_processor = InputProcessor(self.vllm_config)
|
||||
self.io_processor = get_io_processor(
|
||||
self.vllm_config,
|
||||
self.model_config.io_processor_plugin,
|
||||
@@ -357,12 +356,11 @@ class LLMEngine:
|
||||
return self.input_processor.tokenizer
|
||||
|
||||
def get_tokenizer(self) -> TokenizerLike:
|
||||
if self.tokenizer is None:
|
||||
raise ValueError(
|
||||
"Unable to get tokenizer because `skip_tokenizer_init=True`"
|
||||
)
|
||||
return self.input_processor.get_tokenizer()
|
||||
|
||||
return self.tokenizer
|
||||
@property
|
||||
def renderer(self) -> RendererLike:
|
||||
return self.input_processor.renderer
|
||||
|
||||
def do_log_stats(self) -> None:
|
||||
"""Log stats if logging is enabled."""
|
||||
|
||||
Reference in New Issue
Block a user