[RFC][vllm-API] Support tokenizer registry for customized tokenizer in vLLM (#12518)
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
This commit is contained in:
@@ -102,8 +102,9 @@ class ModelConfig:
|
||||
it; otherwise, you must specify explicitly which task to use.
|
||||
tokenizer: Name or path of the huggingface tokenizer to use.
|
||||
tokenizer_mode: Tokenizer mode. "auto" will use the fast tokenizer if
|
||||
available, "slow" will always use the slow tokenizer, and
|
||||
"mistral" will always use the tokenizer from `mistral_common`.
|
||||
available, "slow" will always use the slow tokenizer,
|
||||
"mistral" will always use the tokenizer from `mistral_common`, and
|
||||
"custom" will use --tokenizer to select the preregistered tokenizer.
|
||||
trust_remote_code: Trust remote code (e.g., from HuggingFace) when
|
||||
downloading the model and tokenizer.
|
||||
allowed_local_media_path: Allowing API requests to read local images or
|
||||
@@ -467,10 +468,10 @@ class ModelConfig:
|
||||
|
||||
def _verify_tokenizer_mode(self) -> None:
|
||||
tokenizer_mode = self.tokenizer_mode.lower()
|
||||
if tokenizer_mode not in ["auto", "slow", "mistral"]:
|
||||
if tokenizer_mode not in ["auto", "slow", "mistral", "custom"]:
|
||||
raise ValueError(
|
||||
f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be "
|
||||
"either 'auto', 'slow' or 'mistral'.")
|
||||
"either 'auto', 'slow', 'mistral' or 'custom'.")
|
||||
self.tokenizer_mode = tokenizer_mode
|
||||
|
||||
def _get_preferred_task(
|
||||
|
||||
Reference in New Issue
Block a user