[Bugfix] Replace PoolingParams.normalize with use_activation (#32243)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-13 18:45:42 +08:00
parent 11b6af5280
commit 0aa8c40552
21 changed files with 68 additions and 70 deletions
--- a/tests/entrypoints/pooling/embed/test_offline.py
+++ b/tests/entrypoints/pooling/embed/test_offline.py
@@ -53,7 +53,9 @@ def test_token_embed(llm: LLM):
 def test_pooling_params(llm: LLM):
    def get_outputs(normalize):
        outputs = llm.embed(
-            prompts, pooling_params=PoolingParams(normalize=normalize), use_tqdm=False
+            prompts,
+            pooling_params=PoolingParams(use_activation=normalize),
+            use_tqdm=False,
        )
        return torch.tensor([x.outputs.embedding for x in outputs])

--- a/tests/entrypoints/pooling/embed/test_online_long_text.py
+++ b/tests/entrypoints/pooling/embed/test_online_long_text.py
@@ -216,7 +216,7 @@ def server_with_chunked_processing():
        "512",  # Set smaller max_model_len to trigger chunking mechanism
        "--pooler-config",
        (
-            '{"pooling_type": "MEAN", "normalize": true, '
+            '{"pooling_type": "MEAN", "use_activation": true, '
            '"enable_chunked_processing": true, "max_embed_len": 10000}'
        ),
        "--gpu-memory-utilization",