[Model] Pooling model activation supports per request control by PoolingParams (#20538)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-08-05 15:37:00 +08:00
committed by GitHub
parent 811ac13d03
commit 586f286789
21 changed files with 948 additions and 173 deletions

View File

@@ -3,6 +3,8 @@
import pytest
import requests
import torch
import torch.nn.functional as F
from vllm.entrypoints.openai.protocol import ClassificationResponse
@@ -181,3 +183,32 @@ async def test_invocations(server: RemoteOpenAIServer):
assert classification_data.keys() == invocation_data.keys()
assert classification_data["probs"] == pytest.approx(
invocation_data["probs"], rel=0.01)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_activation(server: RemoteOpenAIServer, model_name: str):
input_text = ["This product was excellent and exceeded my expectations"]
async def get_outputs(activation):
response = requests.post(server.url_for("classify"),
json={
"model": model_name,
"input": input_text,
"activation": activation
})
outputs = response.json()
return torch.tensor([x['probs'] for x in outputs["data"]])
default = await get_outputs(activation=None)
w_activation = await get_outputs(activation=True)
wo_activation = await get_outputs(activation=False)
assert torch.allclose(default, w_activation,
atol=1e-2), "Default should use activation."
assert not torch.allclose(
w_activation, wo_activation,
atol=1e-2), "wo_activation should not use activation."
assert torch.allclose(
F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2
), "w_activation should be close to activation(wo_activation)."