Avoid bytecode hook and simplify TorchCompileWrapperWithCustomDipatch (#25110)
Signed-off-by: Laith Sakka <lsakka@meta.com>
This commit is contained in:
@@ -22,6 +22,8 @@ from vllm.config import (
|
||||
from vllm.forward_context import BatchDescriptor, set_forward_context
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
from ...utils import create_new_process_for_each_test
|
||||
|
||||
# This import automatically registers `torch.ops.silly.attention`
|
||||
from .. import silly_attention # noqa: F401
|
||||
|
||||
@@ -193,7 +195,14 @@ def run_model(
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_inductor_graph_partition", [False, True])
|
||||
def test_multi_graph_piecewise_compile(use_inductor_graph_partition: bool):
|
||||
@pytest.mark.parametrize("use_bytecode_hook", [True, False])
|
||||
@create_new_process_for_each_test("spawn")
|
||||
def test_multi_graph_piecewise_compile(
|
||||
use_inductor_graph_partition: bool, use_bytecode_hook: bool, monkeypatch
|
||||
):
|
||||
# Set the environment variable for this test
|
||||
monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1" if use_bytecode_hook else "0")
|
||||
|
||||
if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
|
||||
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user