From 625ccd1c4d1996a144b0167caefd150cf2956437 Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Thu, 14 Aug 2025 23:09:27 +0800 Subject: [PATCH] [Bugfix] Replace custom Encoding class with BatchEncoding in MistralTokenizer (#22786) Signed-off-by: zjy0516 --- vllm/transformers_utils/tokenizers/mistral.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 6ccc636ef..4dd8b2439 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -2,13 +2,13 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os -from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, Union, cast import huggingface_hub import regex as re from huggingface_hub import HfApi, hf_hub_download +from transformers.tokenization_utils_base import BatchEncoding from vllm.logger import init_logger from vllm.transformers_utils.tokenizer_base import TokenizerBase @@ -27,11 +27,6 @@ if TYPE_CHECKING: logger = init_logger(__name__) -@dataclass -class Encoding: - input_ids: Union[list[int], list[list[int]]] - - def maybe_serialize_tool_calls(request: "ChatCompletionRequest"): # SEE: https://github.com/vllm-project/vllm/pull/9951 # Credits go to: @gcalmettes @@ -359,7 +354,7 @@ class MistralTokenizer(TokenizerBase): # For str, single prompt text else: input_ids = self.encode_one(text, truncation, max_length) - return Encoding(input_ids=input_ids) + return BatchEncoding({"input_ids": input_ids}) def get_vocab(self) -> dict[str, int]: # NB: the dictionary form of the vocabulary collapses token ids that map