[Misc] Refactor tokenizer interface (#29693)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -14,8 +14,9 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
|
||||
@@ -173,7 +174,7 @@ def test_extract_tool_calls(
|
||||
|
||||
def stream_delta_message_generator(
|
||||
ernie45_tool_parser: Ernie45ToolParser,
|
||||
ernie45_tokenizer: AnyTokenizer,
|
||||
ernie45_tokenizer: TokenizerLike,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> Generator[DeltaMessage, None, None]:
|
||||
|
||||
@@ -10,8 +10,9 @@ from partial_json_parser.core.options import Allow
|
||||
|
||||
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -44,7 +45,9 @@ def assert_tool_calls(
|
||||
|
||||
|
||||
def stream_delta_message_generator(
|
||||
jamba_tool_parser: JambaToolParser, jamba_tokenizer: AnyTokenizer, model_output: str
|
||||
jamba_tool_parser: JambaToolParser,
|
||||
jamba_tokenizer: TokenizerLike,
|
||||
model_output: str,
|
||||
) -> Generator[DeltaMessage, None, None]:
|
||||
all_token_ids = jamba_tokenizer.encode(model_output, add_special_tokens=False)
|
||||
|
||||
|
||||
@@ -17,8 +17,9 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
|
||||
Qwen3CoderToolParser,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -104,7 +105,7 @@ def assert_tool_calls(
|
||||
|
||||
def stream_delta_message_generator(
|
||||
qwen3_tool_parser,
|
||||
qwen3_tokenizer: AnyTokenizer,
|
||||
qwen3_tokenizer: TokenizerLike,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> Generator[DeltaMessage, None, None]:
|
||||
|
||||
@@ -15,8 +15,9 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -256,7 +257,7 @@ def test_streaming_tool_calls_no_tools(seed_oss_tool_parser):
|
||||
|
||||
def stream_delta_message_generator(
|
||||
seed_oss_tool_parser: SeedOssToolParser,
|
||||
seed_oss_tokenizer: AnyTokenizer,
|
||||
seed_oss_tokenizer: TokenizerLike,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> Generator[DeltaMessage, None, None]:
|
||||
|
||||
@@ -13,8 +13,9 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -49,7 +50,7 @@ def assert_tool_calls(
|
||||
|
||||
def stream_delta_message_generator(
|
||||
xlam_tool_parser: xLAMToolParser,
|
||||
xlam_tokenizer: AnyTokenizer,
|
||||
xlam_tokenizer: TokenizerLike,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> Generator[DeltaMessage, None, None]:
|
||||
|
||||
Reference in New Issue
Block a user