[Core] Remove tokenizer group in vLLM (#24078)
Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
@@ -14,7 +14,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(zephyr_lora_added_tokens_files: str): # noqa: F811
|
||||
def server():
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
@@ -24,12 +24,6 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
|
||||
"--enforce-eager",
|
||||
"--max-num-seqs",
|
||||
"128",
|
||||
# lora config
|
||||
"--enable-lora",
|
||||
"--lora-modules",
|
||||
f"zephyr-lora2={zephyr_lora_added_tokens_files}",
|
||||
"--max-lora-rank",
|
||||
"64",
|
||||
"--enable-tokenizer-info-endpoint",
|
||||
]
|
||||
|
||||
@@ -38,10 +32,8 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def tokenizer_name(model_name: str,
|
||||
zephyr_lora_added_tokens_files: str): # noqa: F811
|
||||
return zephyr_lora_added_tokens_files if (
|
||||
model_name == "zephyr-lora2") else model_name
|
||||
def tokenizer_name(model_name: str):
|
||||
return model_name
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
@@ -53,7 +45,7 @@ async def client(server):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenize_completions(
|
||||
@@ -86,7 +78,7 @@ async def test_tokenize_completions(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenize_chat(
|
||||
@@ -148,7 +140,7 @@ async def test_tokenize_chat(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenize_chat_with_tools(
|
||||
@@ -225,7 +217,7 @@ async def test_tokenize_chat_with_tools(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name, tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenize_with_return_token_strs(
|
||||
@@ -260,7 +252,7 @@ async def test_tokenize_with_return_token_strs(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_detokenize(
|
||||
@@ -287,7 +279,7 @@ async def test_detokenize(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
[(MODEL_NAME, MODEL_NAME)],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenizer_info_basic(
|
||||
@@ -384,4 +376,4 @@ async def test_tokenizer_info_chat_template(server: RemoteOpenAIServer):
|
||||
if chat_template:
|
||||
assert isinstance(chat_template,
|
||||
str), ("Chat template should be a string")
|
||||
assert chat_template.strip(), "Chat template should not be empty"
|
||||
assert chat_template.strip(), "Chat template should not be empty"
|
||||
|
||||
Reference in New Issue
Block a user