[Model] Pooling model activation supports per request control by PoolingParams (#20538)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -3,6 +3,8 @@
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from vllm.entrypoints.openai.protocol import ClassificationResponse
|
||||
|
||||
@@ -181,3 +183,32 @@ async def test_invocations(server: RemoteOpenAIServer):
|
||||
assert classification_data.keys() == invocation_data.keys()
|
||||
assert classification_data["probs"] == pytest.approx(
|
||||
invocation_data["probs"], rel=0.01)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_activation(server: RemoteOpenAIServer, model_name: str):
|
||||
input_text = ["This product was excellent and exceeded my expectations"]
|
||||
|
||||
async def get_outputs(activation):
|
||||
response = requests.post(server.url_for("classify"),
|
||||
json={
|
||||
"model": model_name,
|
||||
"input": input_text,
|
||||
"activation": activation
|
||||
})
|
||||
outputs = response.json()
|
||||
return torch.tensor([x['probs'] for x in outputs["data"]])
|
||||
|
||||
default = await get_outputs(activation=None)
|
||||
w_activation = await get_outputs(activation=True)
|
||||
wo_activation = await get_outputs(activation=False)
|
||||
|
||||
assert torch.allclose(default, w_activation,
|
||||
atol=1e-2), "Default should use activation."
|
||||
assert not torch.allclose(
|
||||
w_activation, wo_activation,
|
||||
atol=1e-2), "wo_activation should not use activation."
|
||||
assert torch.allclose(
|
||||
F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2
|
||||
), "w_activation should be close to activation(wo_activation)."
|
||||
|
||||
Reference in New Issue
Block a user