[CI/Build][Hardware][AMD] Fix v1/shutdown (#31997)

Signed-off-by: Ryan Rock <ryan.rock@amd.com>
This commit is contained in:
Ryan Rock
2026-01-14 22:01:42 -06:00
committed by GitHub
parent 8471b27df9
commit 15422ed3f7
3 changed files with 64 additions and 3 deletions

View File

@@ -0,0 +1,26 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
from collections.abc import Iterable
from pathlib import Path
import pytest
from vllm.platforms import current_platform
@pytest.fixture
def rocm_sitecustomize_factory(monkeypatch, tmp_path: Path):
"""Return a function that installs a given sitecustomize payload."""
if not current_platform.is_rocm():
return lambda _: None
def install(lines: Iterable[str]) -> None:
sc = tmp_path / "sitecustomize.py"
sc.write_text("\n".join(lines) + "\n")
monkeypatch.setenv(
"PYTHONPATH",
os.pathsep.join(filter(None, [str(tmp_path), os.getenv("PYTHONPATH")])),
)
return install

View File

@@ -3,6 +3,7 @@
"""Test that we handle an Error in model forward and shutdown.""" """Test that we handle an Error in model forward and shutdown."""
import asyncio import asyncio
import inspect
import pytest import pytest
@@ -38,11 +39,22 @@ def evil_forward(self, *args, **kwargs):
return self.model(*args, **kwargs) return self.model(*args, **kwargs)
@pytest.fixture
def rocm_evil_forward(rocm_sitecustomize_factory):
lines = [
"from vllm.distributed import get_tensor_model_parallel_rank",
"from vllm.model_executor.models.llama import LlamaForCausalLM",
inspect.getsource(evil_forward),
f"LlamaForCausalLM.forward = {evil_forward.__name__}",
]
rocm_sitecustomize_factory(lines)
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("tensor_parallel_size", [2, 1]) @pytest.mark.parametrize("tensor_parallel_size", [2, 1])
@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("model", MODELS)
async def test_async_llm_model_error( async def test_async_llm_model_error(
monkeypatch, tensor_parallel_size: int, model: str monkeypatch, rocm_evil_forward, tensor_parallel_size: int, model: str
) -> None: ) -> None:
"""Test that AsyncLLM propagates a forward pass error and frees memory. """Test that AsyncLLM propagates a forward pass error and frees memory.
@@ -104,7 +116,11 @@ async def test_async_llm_model_error(
@pytest.mark.parametrize("tensor_parallel_size", [2, 1]) @pytest.mark.parametrize("tensor_parallel_size", [2, 1])
@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("model", MODELS)
def test_llm_model_error( def test_llm_model_error(
monkeypatch, tensor_parallel_size: int, enable_multiprocessing: bool, model: str monkeypatch,
rocm_evil_forward,
tensor_parallel_size: int,
enable_multiprocessing: bool,
model: str,
) -> None: ) -> None:
"""Test that LLM propagates a forward pass error and frees memory. """Test that LLM propagates a forward pass error and frees memory.
TODO(andy) - LLM without multiprocessing; LLM with multiprocessing TODO(andy) - LLM without multiprocessing; LLM with multiprocessing

View File

@@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test that we handle a startup Error and shutdown.""" """Test that we handle a startup Error and shutdown."""
import inspect
import pytest import pytest
from tests.utils import wait_for_gpu_memory_to_clear from tests.utils import wait_for_gpu_memory_to_clear
@@ -28,12 +30,28 @@ def evil_method(self, *args, **kwargs):
return self.model(*args, **kwargs, intermediate_tensors=None) return self.model(*args, **kwargs, intermediate_tensors=None)
@pytest.fixture
def rocm_evil_method(rocm_sitecustomize_factory, request):
failing_method = request.getfixturevalue("failing_method")
lines = [
"from vllm.distributed import get_tensor_model_parallel_rank",
"from vllm.model_executor.models.llama import LlamaForCausalLM",
inspect.getsource(evil_method),
f"LlamaForCausalLM.{failing_method} = {evil_method.__name__}",
]
rocm_sitecustomize_factory(lines)
@pytest.mark.timeout(SHUTDOWN_TEST_TIMEOUT_SEC) @pytest.mark.timeout(SHUTDOWN_TEST_TIMEOUT_SEC)
@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("tensor_parallel_size", [2, 1]) @pytest.mark.parametrize("tensor_parallel_size", [2, 1])
@pytest.mark.parametrize("failing_method", ["forward", "load_weights"]) @pytest.mark.parametrize("failing_method", ["forward", "load_weights"])
def test_async_llm_startup_error( def test_async_llm_startup_error(
monkeypatch, model: str, tensor_parallel_size: int, failing_method: str monkeypatch,
rocm_evil_method,
model: str,
tensor_parallel_size: int,
failing_method: str,
) -> None: ) -> None:
"""Test that AsyncLLM propagates an __init__ error & frees memory. """Test that AsyncLLM propagates an __init__ error & frees memory.
Test profiling (forward()) and load weights failures. Test profiling (forward()) and load weights failures.
@@ -67,6 +85,7 @@ def test_async_llm_startup_error(
@pytest.mark.parametrize("failing_method", ["forward", "load_weights"]) @pytest.mark.parametrize("failing_method", ["forward", "load_weights"])
def test_llm_startup_error( def test_llm_startup_error(
monkeypatch, monkeypatch,
rocm_evil_method,
model: str, model: str,
tensor_parallel_size: int, tensor_parallel_size: int,
enable_multiprocessing: bool, enable_multiprocessing: bool,