Support encoder-only models without KV-Cache (#21270)
Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
committed by
GitHub
parent
f27fdfc3ed
commit
1cd6eaba54
@@ -22,10 +22,12 @@ REVISION_ROBERTA = os.environ.get("REVISION", "main")
|
||||
|
||||
@pytest.mark.skipif(current_platform.is_rocm(),
|
||||
reason="Xformers backend is not supported on ROCm.")
|
||||
def test_model_loading_with_params(vllm_runner):
|
||||
def test_model_loading_with_params(vllm_runner, monkeypatch):
|
||||
"""
|
||||
Test parameter weight loading with tp>1.
|
||||
"""
|
||||
# to use apply_model
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
with vllm_runner(model_name=MODEL_NAME,
|
||||
revision=REVISION,
|
||||
dtype="float16",
|
||||
@@ -61,10 +63,12 @@ def test_model_loading_with_params(vllm_runner):
|
||||
|
||||
@pytest.mark.skipif(current_platform.is_rocm(),
|
||||
reason="Xformers backend is not supported on ROCm.")
|
||||
def test_roberta_model_loading_with_params(vllm_runner):
|
||||
def test_roberta_model_loading_with_params(vllm_runner, monkeypatch):
|
||||
"""
|
||||
Test parameter weight loading with tp>1.
|
||||
"""
|
||||
# to use apply_model
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
with vllm_runner(model_name=MODEL_NAME_ROBERTA,
|
||||
revision=REVISION_ROBERTA,
|
||||
dtype="float16",
|
||||
@@ -101,10 +105,12 @@ def test_roberta_model_loading_with_params(vllm_runner):
|
||||
|
||||
@pytest.mark.skipif(current_platform.is_rocm(),
|
||||
reason="Xformers backend is not supported on ROCm.")
|
||||
def test_facebook_roberta_model_loading_with_params(vllm_runner):
|
||||
def test_facebook_roberta_model_loading_with_params(vllm_runner, monkeypatch):
|
||||
"""
|
||||
Test loading roberta-base model with no lm_head.
|
||||
"""
|
||||
# to use apply_model
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
model_name = "FacebookAI/roberta-base"
|
||||
with vllm_runner(model_name=model_name,
|
||||
dtype="float16",
|
||||
|
||||
Reference in New Issue
Block a user