[Core] Remove tokenizer group in vLLM (#24078)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
2025-09-17 01:42:59 -07:00
parent c15309a730
commit 6c47f6bfa4
49 changed files with 276 additions and 934 deletions
--- a/tests/entrypoints/openai/test_lora_adapters.py
+++ b/tests/entrypoints/openai/test_lora_adapters.py
@@ -67,12 +67,6 @@ def server_with_lora_modules_json(request, monkeypatch_module,
        "base_model_name": MODEL_NAME
    }

-    lora_module_2 = {
-        "name": "zephyr-lora2",
-        "path": zephyr_lora_files,
-        "base_model_name": MODEL_NAME
-    }
-
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
@@ -84,7 +78,6 @@ def server_with_lora_modules_json(request, monkeypatch_module,
        "--enable-lora",
        "--lora-modules",
        json.dumps(lora_module_1),
-        json.dumps(lora_module_2),
        "--max-lora-rank",
        "64",
        "--max-cpu-loras",
@@ -121,7 +114,6 @@ async def test_static_lora_lineage(client: openai.AsyncOpenAI,
               for lora_model in lora_models)
    assert all(lora_model.parent == MODEL_NAME for lora_model in lora_models)
    assert lora_models[0].id == "zephyr-lora"
-    assert lora_models[1].id == "zephyr-lora2"


@pytest.mark.asyncio
@@ -209,7 +201,7 @@ async def test_dynamic_lora_badrequests(client: openai.AsyncOpenAI, tmp_path,
@pytest.mark.asyncio
 async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path,
                                      zephyr_lora_files):
-    """Validate that many loras can be dynamically registered and inferenced 
+    """Validate that many loras can be dynamically registered and inferenced
    with concurrently"""

    # This test file configures the server with --max-cpu-loras=2 and this test