[Core] Remove tokenizer group in vLLM (#24078)
Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
@@ -12,7 +12,6 @@ from tests.v1.engine.utils import (NUM_PROMPT_LOGPROBS_UNDER_TEST,
|
||||
generate_dummy_prompt_logprobs_tensors,
|
||||
generate_dummy_sample_logprobs)
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||
|
||||
from ...distributed.conftest import publisher_config, random_port # noqa: F401
|
||||
|
||||
@@ -24,7 +23,7 @@ EngineCorePromptLogprobsType = tuple[torch.Tensor, torch.Tensor]
|
||||
|
||||
def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
|
||||
"""Generate output processor dummy test vectors, without logprobs
|
||||
|
||||
|
||||
Returns:
|
||||
DummyOutputProcessorTestVectors instance with no logprobs
|
||||
"""
|
||||
@@ -48,9 +47,6 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
|
||||
]
|
||||
return DummyOutputProcessorTestVectors(
|
||||
tokenizer=tokenizer,
|
||||
tokenizer_group=init_tokenizer_from_configs(
|
||||
vllm_config.model_config, vllm_config.scheduler_config,
|
||||
vllm_config.lora_config),
|
||||
vllm_config=vllm_config,
|
||||
full_tokens=[tokenizer(text).input_ids for text in FULL_STRINGS],
|
||||
prompt_tokens=prompt_tokens,
|
||||
@@ -68,7 +64,7 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors:
|
||||
@pytest.fixture
|
||||
def dummy_test_vectors() -> DummyOutputProcessorTestVectors:
|
||||
"""Generate output processor dummy test vectors, with logprobs
|
||||
|
||||
|
||||
Returns:
|
||||
DummyOutputProcessorTestVectors instance with logprobs
|
||||
"""
|
||||
|
||||
@@ -43,7 +43,7 @@ def _ref_convert_id_to_token(
|
||||
[RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||
def test_incremental_detokenization(request_output_kind: RequestOutputKind,
|
||||
dummy_test_vectors):
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
|
||||
log_stats=False)
|
||||
engine_core = MockEngineCore(
|
||||
tokens_list=dummy_test_vectors.generation_tokens)
|
||||
@@ -382,7 +382,7 @@ def test_logprobs_processor(request_output_kind: RequestOutputKind,
|
||||
num_sample_logprobs: Optional[int],
|
||||
num_prompt_logprobs: Optional[int],
|
||||
dummy_test_vectors):
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
|
||||
log_stats=False)
|
||||
engine_core = MockEngineCore(
|
||||
tokens_list=dummy_test_vectors.generation_tokens,
|
||||
@@ -535,7 +535,7 @@ def test_stop_token(include_stop_str_in_output: bool,
|
||||
) # '<|end_of_text|>'
|
||||
stop_token_ids = [128009] if not is_eos_test else None # '<|eot_id|>'
|
||||
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
|
||||
log_stats=False)
|
||||
# Dummy engine core outputs, with control tokens suffixed to test stops
|
||||
suffix_token = ([eos_token_id] if is_eos_test else stop_token_ids)
|
||||
@@ -642,7 +642,7 @@ def test_stop_token(include_stop_str_in_output: bool,
|
||||
[None, NUM_SAMPLE_LOGPROBS_UNDER_TEST])
|
||||
def test_stop_string(include_stop_str_in_output: bool,
|
||||
num_sample_logprobs: Optional[int], dummy_test_vectors):
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
|
||||
log_stats=False)
|
||||
engine_core = MockEngineCore(
|
||||
tokens_list=dummy_test_vectors.generation_tokens,
|
||||
@@ -763,7 +763,7 @@ def test_stop_string(include_stop_str_in_output: bool,
|
||||
|
||||
|
||||
def test_iteration_stats(dummy_test_vectors):
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group,
|
||||
output_processor = OutputProcessor(dummy_test_vectors.tokenizer,
|
||||
log_stats=True)
|
||||
engine_core = MockEngineCore(dummy_test_vectors.generation_tokens)
|
||||
engine_core_timestamp = time.monotonic()
|
||||
|
||||
@@ -9,7 +9,6 @@ import torch
|
||||
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
|
||||
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.transformers_utils.tokenizer_group import TokenizerGroup
|
||||
from vllm.v1.engine import EngineCoreOutput, FinishReason
|
||||
from vllm.v1.outputs import LogprobsLists, LogprobsTensors
|
||||
|
||||
@@ -39,7 +38,7 @@ def _create_random_top_logprob_test_vector(
|
||||
upper: float,
|
||||
) -> torch.Tensor:
|
||||
"""Create a random vector of top logprob float values.
|
||||
|
||||
|
||||
Use to create fake sample logprobs for testing.
|
||||
|
||||
Note that a real production scenario would require
|
||||
@@ -63,7 +62,7 @@ def _create_random_top_logprob_test_matrix(
|
||||
upper: float,
|
||||
) -> torch.Tensor:
|
||||
"""Create a random matrix of top logprob float values.
|
||||
|
||||
|
||||
Use to create fake prompt logprobs for testing.
|
||||
|
||||
Note that a real production scenario would require
|
||||
@@ -296,7 +295,6 @@ def generate_dummy_prompt_logprobs_tensors(
|
||||
class DummyOutputProcessorTestVectors:
|
||||
"""Dummy test vectors for output processor tests"""
|
||||
tokenizer: GeneralTokenizerType
|
||||
tokenizer_group: TokenizerGroup
|
||||
vllm_config: EngineArgs
|
||||
full_tokens: list[list[int]] # Prompt + generated tokens
|
||||
prompt_tokens: list[list[int]]
|
||||
|
||||
Reference in New Issue
Block a user