[CI] execute all piecewise compilation tests together (#24502)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
2025-09-10 02:05:25 +08:00
parent c3f9773b2c
commit b8a93076d3
6 changed files with 81 additions and 117 deletions
--- a/tests/compile/piecewise/test_simple.py
+++ b/tests/compile/piecewise/test_simple.py
@@ -4,10 +4,10 @@
 Test the piecewise compilation with a simple model so that we
 can exactly calculate the expected output and side effects.
 """
+
 import pytest
 import torch
 from torch import nn
-from torch.library import Library

 from vllm.compilation.counter import compilation_counter
 from vllm.compilation.decorators import support_torch_compile
@@ -15,35 +15,9 @@ from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode,
                         VllmConfig, set_current_vllm_config)
 from vllm.envs import VLLM_USE_V1
 from vllm.forward_context import BatchDescriptor, set_forward_context
-from vllm.utils import direct_register_custom_op

-global_counter = 0
-
-# create a library to hold the custom op
-silly_lib = Library("silly", "FRAGMENT")  # noqa
-
-
-def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-                    out: torch.Tensor) -> None:
-    global global_counter
-    global_counter += 1
-    print(f"{global_counter=}")
-    out.copy_(q)
-    out[0] += 1
-
-
-def silly_attention_fake(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-                         out: torch.Tensor) -> None:
-    return
-
-
-direct_register_custom_op(
-    op_name="attention",
-    op_func=silly_attention,
-    mutates_args=["out"],
-    fake_impl=silly_attention_fake,
-    target_lib=silly_lib,
-)
+# This import automatically registers `torch.ops.silly.attention`
+from ..silly_attention import get_global_counter, reset_global_counter


@support_torch_compile
@@ -59,8 +33,7 @@ class SillyModel(nn.Module):
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Overall effect:
-        x += 1
-        x[0] += 2
+        x = 3 * x + 19
        global_counter += 2
        """
        x = x + 1
@@ -78,6 +51,7 @@ class SillyModel(nn.Module):


@pytest.mark.parametrize("use_inductor", [True, False])
+@torch.inference_mode()
 def test_simple_piecewise_compile(use_inductor):
    assert VLLM_USE_V1

@@ -121,13 +95,12 @@ def test_simple_piecewise_compile(use_inductor):
            model(torch.randn(1).cuda())

        input = torch.zeros(2).cuda()
-        global global_counter
-        global_counter = 0
+        reset_global_counter()
        with set_forward_context(
                None,
                vllm_config=vllm_config,
                cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
                batch_descriptor=BatchDescriptor(num_tokens=2, )):
            output = model(input)
-        assert global_counter == 2
-        assert torch.allclose(output.cpu(), torch.tensor([3., 1.]))
+        assert get_global_counter() == 2
+        assert torch.allclose(output.cpu(), torch.tensor([19.0, 19.0]))