[Core] Remove tokenizer group in vLLM (#24078)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
Zhuohan Li
2025-09-17 01:42:59 -07:00
committed by GitHub
parent c15309a730
commit 6c47f6bfa4
49 changed files with 276 additions and 934 deletions

View File

@@ -14,7 +14,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@pytest.fixture(scope="module")
def server(zephyr_lora_added_tokens_files: str): # noqa: F811
def server():
args = [
# use half precision for speed and memory savings in CI environment
"--dtype",
@@ -24,12 +24,6 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
"--enforce-eager",
"--max-num-seqs",
"128",
# lora config
"--enable-lora",
"--lora-modules",
f"zephyr-lora2={zephyr_lora_added_tokens_files}",
"--max-lora-rank",
"64",
"--enable-tokenizer-info-endpoint",
]
@@ -38,10 +32,8 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
@pytest.fixture(scope="module")
def tokenizer_name(model_name: str,
zephyr_lora_added_tokens_files: str): # noqa: F811
return zephyr_lora_added_tokens_files if (
model_name == "zephyr-lora2") else model_name
def tokenizer_name(model_name: str):
return model_name
@pytest_asyncio.fixture
@@ -53,7 +45,7 @@ async def client(server):
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name,tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_tokenize_completions(
@@ -86,7 +78,7 @@ async def test_tokenize_completions(
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name,tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_tokenize_chat(
@@ -148,7 +140,7 @@ async def test_tokenize_chat(
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name,tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_tokenize_chat_with_tools(
@@ -225,7 +217,7 @@ async def test_tokenize_chat_with_tools(
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name, tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_tokenize_with_return_token_strs(
@@ -260,7 +252,7 @@ async def test_tokenize_with_return_token_strs(
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name,tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_detokenize(
@@ -287,7 +279,7 @@ async def test_detokenize(
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name,tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
[(MODEL_NAME, MODEL_NAME)],
indirect=["tokenizer_name"],
)
async def test_tokenizer_info_basic(
@@ -384,4 +376,4 @@ async def test_tokenizer_info_chat_template(server: RemoteOpenAIServer):
if chat_template:
assert isinstance(chat_template,
str), ("Chat template should be a string")
assert chat_template.strip(), "Chat template should not be empty"
assert chat_template.strip(), "Chat template should not be empty"