Add evaluate_guards option to DynamicShapesConfig (#27432)
Signed-off-by: Laith Sakka <lsakka@meta.com>
This commit is contained in:
@@ -2,12 +2,21 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import gc
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.config.compilation import CompilationMode, DynamicShapesType
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config
|
||||
from vllm.config.compilation import (
|
||||
CompilationMode,
|
||||
DynamicShapesConfig,
|
||||
DynamicShapesType,
|
||||
)
|
||||
from vllm.forward_context import set_forward_context
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
@@ -29,18 +38,19 @@ def get_test_models():
|
||||
)
|
||||
@pytest.mark.parametrize("use_aot_compile", ["0"])
|
||||
@pytest.mark.parametrize("use_bytecode_hook", [True, False])
|
||||
@pytest.mark.parametrize("evaluate_guards", [False, True])
|
||||
@pytest.mark.skipif(
|
||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
||||
)
|
||||
def test_dynamic_shapes_compilation(
|
||||
monkeypatch, model_name, shapes_type, use_aot_compile, use_bytecode_hook
|
||||
monkeypatch,
|
||||
model_name,
|
||||
shapes_type,
|
||||
use_aot_compile,
|
||||
use_bytecode_hook,
|
||||
evaluate_guards,
|
||||
):
|
||||
"""Test that all dynamic shapes types compile successfully"""
|
||||
print(
|
||||
f"\nTesting model: {model_name} with {shapes_type.name}, "
|
||||
f"AOT compile: {use_aot_compile}, "
|
||||
f"Bytecode hook: {use_bytecode_hook}"
|
||||
)
|
||||
if use_bytecode_hook and shapes_type == DynamicShapesType.UNBACKED:
|
||||
pytest.skip("UNBACKED dynamic shapes require VLLM_USE_BYTECODE_HOOK=0")
|
||||
|
||||
@@ -58,6 +68,7 @@ def test_dynamic_shapes_compilation(
|
||||
"mode": CompilationMode.VLLM_COMPILE,
|
||||
"dynamic_shapes_config": {
|
||||
"type": shapes_type.value,
|
||||
"evaluate_guards": evaluate_guards,
|
||||
},
|
||||
},
|
||||
)
|
||||
@@ -86,3 +97,117 @@ def test_dynamic_shapes_compilation(
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.synchronize()
|
||||
print("GPU memory cleared")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_aot_compile", ["0", "1"])
|
||||
@pytest.mark.parametrize(
|
||||
"dynamic_shapes_type",
|
||||
[
|
||||
DynamicShapesType.BACKED,
|
||||
DynamicShapesType.BACKED_SIZE_OBLIVIOUS,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("evaluate_guards", [False, True])
|
||||
def test_model_specialization_with_evaluate_guards(
|
||||
monkeypatch, use_aot_compile, dynamic_shapes_type, evaluate_guards
|
||||
):
|
||||
"""Test that evaluate_guards correctly detects shape specialization
|
||||
violations.
|
||||
"""
|
||||
|
||||
if (
|
||||
use_aot_compile == "1"
|
||||
and dynamic_shapes_type == DynamicShapesType.BACKED
|
||||
and evaluate_guards
|
||||
):
|
||||
pytest.skip("evaluate_guards for backed does not work with aot_compile =1")
|
||||
|
||||
@support_torch_compile
|
||||
class ModelWithSizeCheck(torch.nn.Module):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x: torch.Tensor):
|
||||
# This will cause specialization - torch.compile will guard on
|
||||
# sx.shape[0]
|
||||
if x.shape[0] >= 10:
|
||||
return x * 10
|
||||
else:
|
||||
return x * 10
|
||||
|
||||
@support_torch_compile
|
||||
class ModelWithOneSizeCheck(torch.nn.Module):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x: torch.Tensor):
|
||||
# This will cause 0/1 specializations.
|
||||
if x.shape[0] == 0:
|
||||
return x * 10
|
||||
if x.shape[0] == 1:
|
||||
return x * 10
|
||||
else:
|
||||
return x * 10
|
||||
|
||||
@contextmanager
|
||||
def use_vllm_config(vllm_config: VllmConfig):
|
||||
with set_forward_context({}, vllm_config), set_current_vllm_config(vllm_config):
|
||||
yield
|
||||
|
||||
monkeypatch.setenv("TOKENIZERS_PARALLELISM", "true")
|
||||
monkeypatch.setenv("VLLM_USE_AOT_COMPILE", use_aot_compile)
|
||||
monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "0")
|
||||
|
||||
# Create vllm config with the desired settings
|
||||
from vllm.config import CompilationMode
|
||||
|
||||
vllm_config = VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
dynamic_shapes_config=DynamicShapesConfig(
|
||||
type=dynamic_shapes_type,
|
||||
evaluate_guards=evaluate_guards,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
def test(model_class, input1, input2, is_01_specialization=False):
|
||||
with (
|
||||
torch.no_grad(),
|
||||
use_vllm_config(vllm_config),
|
||||
tempfile.TemporaryDirectory() as tmpdirname,
|
||||
):
|
||||
monkeypatch.setenv("VLLM_CACHE_ROOT", tmpdirname)
|
||||
|
||||
model = model_class(vllm_config=vllm_config).cuda()
|
||||
|
||||
model(input1)
|
||||
|
||||
if evaluate_guards and (
|
||||
not (
|
||||
is_01_specialization
|
||||
and dynamic_shapes_type == DynamicShapesType.BACKED
|
||||
)
|
||||
):
|
||||
# This should fail because guards were added.
|
||||
with pytest.raises(RuntimeError) as excinfo:
|
||||
model(input2)
|
||||
|
||||
# Expected failure - guard was violated
|
||||
error_msg = str(excinfo.value)
|
||||
assert (
|
||||
"GuardManager check failed" in error_msg
|
||||
or "Detected recompile when torch.compile stance" in error_msg
|
||||
), error_msg
|
||||
|
||||
else:
|
||||
model(input2)
|
||||
|
||||
test(ModelWithSizeCheck, torch.randn(20, 10).cuda(), torch.randn(5, 10).cuda())
|
||||
test(ModelWithSizeCheck, torch.randn(5, 10).cuda(), torch.randn(20, 10).cuda())
|
||||
test(
|
||||
ModelWithOneSizeCheck,
|
||||
torch.randn(20, 10).cuda(),
|
||||
torch.randn(1, 10).cuda(),
|
||||
is_01_specialization=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user