[Core] Remove tokenizer group in vLLM (#24078)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
2025-09-17 01:42:59 -07:00
parent c15309a730
commit 6c47f6bfa4
49 changed files with 276 additions and 934 deletions
--- a/tests/v1/engine/conftest.py
+++ b/tests/v1/engine/conftest.py
@@ -12,7 +12,6 @@ from tests.v1.engine.utils import (NUM_PROMPT_LOGPROBS_UNDER_TEST,
                                   generate_dummy_prompt_logprobs_tensors,
                                   generate_dummy_sample_logprobs)
 from vllm.engine.arg_utils import EngineArgs
-from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs

 from ...distributed.conftest import publisher_config, random_port  # noqa: F401

@@ -24,7 +23,7 @@ EngineCorePromptLogprobsType = tuple[torch.Tensor, torch.Tensor]

 def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
    """Generate output processor dummy test vectors, without logprobs
-    
+
    Returns:
      DummyOutputProcessorTestVectors instance with no logprobs
    """
@@ -48,9 +47,6 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
    ]
    return DummyOutputProcessorTestVectors(
        tokenizer=tokenizer,
-        tokenizer_group=init_tokenizer_from_configs(
-            vllm_config.model_config, vllm_config.scheduler_config,
-            vllm_config.lora_config),
        vllm_config=vllm_config,
        full_tokens=[tokenizer(text).input_ids for text in FULL_STRINGS],
        prompt_tokens=prompt_tokens,
@@ -68,7 +64,7 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
@pytest.fixture
 def dummy_test_vectors() -> DummyOutputProcessorTestVectors:
    """Generate output processor dummy test vectors, with logprobs
-    
+
    Returns:
      DummyOutputProcessorTestVectors instance with logprobs
    """
--- a/tests/v1/engine/test_output_processor.py
+++ b/tests/v1/engine/test_output_processor.py
@@ -43,7 +43,7 @@ def _ref_convert_id_to_token(
    [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
 def test_incremental_detokenization(request_output_kind: RequestOutputKind,
                                    dummy_test_vectors):
-    output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
+    output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
                                       log_stats=False)
    engine_core = MockEngineCore(
        tokens_list=dummy_test_vectors.generation_tokens)
@@ -382,7 +382,7 @@ def test_logprobs_processor(request_output_kind: RequestOutputKind,
                            num_sample_logprobs: Optional[int],
                            num_prompt_logprobs: Optional[int],
                            dummy_test_vectors):
-    output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
+    output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
                                       log_stats=False)
    engine_core = MockEngineCore(
        tokens_list=dummy_test_vectors.generation_tokens,
@@ -535,7 +535,7 @@ def test_stop_token(include_stop_str_in_output: bool,
    )  # '<|end_of_text|>'
    stop_token_ids = [128009] if not is_eos_test else None  # '<|eot_id|>'

-    output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
+    output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
                                       log_stats=False)
    # Dummy engine core outputs, with control tokens suffixed to test stops
    suffix_token = ([eos_token_id] if is_eos_test else stop_token_ids)
@@ -642,7 +642,7 @@ def test_stop_token(include_stop_str_in_output: bool,
                         [None, NUM_SAMPLE_LOGPROBS_UNDER_TEST])
 def test_stop_string(include_stop_str_in_output: bool,
                     num_sample_logprobs: Optional[int], dummy_test_vectors):
-    output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
+    output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
                                       log_stats=False)
    engine_core = MockEngineCore(
        tokens_list=dummy_test_vectors.generation_tokens,
@@ -763,7 +763,7 @@ def test_stop_string(include_stop_str_in_output: bool,


 def test_iteration_stats(dummy_test_vectors):
-    output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
+    output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
                                       log_stats=True)
    engine_core = MockEngineCore(dummy_test_vectors.generation_tokens)
    engine_core_timestamp = time.monotonic()
--- a/tests/v1/engine/utils.py
+++ b/tests/v1/engine/utils.py
@@ -9,7 +9,6 @@ import torch
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast

 from vllm.engine.arg_utils import EngineArgs
-from vllm.transformers_utils.tokenizer_group import TokenizerGroup
 from vllm.v1.engine import EngineCoreOutput, FinishReason
 from vllm.v1.outputs import LogprobsLists, LogprobsTensors

@@ -39,7 +38,7 @@ def _create_random_top_logprob_test_vector(
    upper: float,
 ) -> torch.Tensor:
    """Create a random vector of top logprob float values.
-    
+
    Use to create fake sample logprobs for testing.

    Note that a real production scenario would require
@@ -63,7 +62,7 @@ def _create_random_top_logprob_test_matrix(
    upper: float,
 ) -> torch.Tensor:
    """Create a random matrix of top logprob float values.
-    
+
    Use to create fake prompt logprobs for testing.

    Note that a real production scenario would require
@@ -296,7 +295,6 @@ def generate_dummy_prompt_logprobs_tensors(
 class DummyOutputProcessorTestVectors:
    """Dummy test vectors for output processor tests"""
    tokenizer: GeneralTokenizerType
-    tokenizer_group: TokenizerGroup
    vllm_config: EngineArgs
    full_tokens: list[list[int]]  # Prompt + generated tokens
    prompt_tokens: list[list[int]]