[Perf] Disable inductor runtime asserts by default for serving perfor… (#37485)
Signed-off-by: tianrengao <terrygao87@gmail.com> Co-authored-by: Tianren Gao <tianren@fb.com>
This commit is contained in:
@@ -5,6 +5,7 @@ from contextlib import nullcontext
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from pydantic import ValidationError
|
||||
|
||||
from vllm.compilation.counter import compilation_counter
|
||||
@@ -612,3 +613,58 @@ def test_adjust_cudagraph_sizes_for_mamba_cache(
|
||||
# Invariant: last element == max_cudagraph_capture_size
|
||||
if expected_sizes:
|
||||
assert config.cudagraph_capture_sizes[-1] == config.max_cudagraph_capture_size
|
||||
|
||||
|
||||
def test_inductor_asserts_default_disabled(monkeypatch):
|
||||
"""Test that inductor runtime asserts are disabled by default
|
||||
(INFO logging level) on torch < 2.12."""
|
||||
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "INFO")
|
||||
|
||||
import importlib
|
||||
|
||||
import vllm.envs
|
||||
|
||||
importlib.reload(vllm.envs)
|
||||
|
||||
config = CompilationConfig()
|
||||
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
|
||||
assert config.inductor_compile_config.get("size_asserts") is False
|
||||
assert config.inductor_compile_config.get("alignment_asserts") is False
|
||||
assert config.inductor_compile_config.get("scalar_asserts") is False
|
||||
|
||||
|
||||
def test_inductor_asserts_enabled_in_debug(monkeypatch):
|
||||
"""Test that VLLM_LOGGING_LEVEL=DEBUG enables inductor runtime asserts
|
||||
on torch < 2.12."""
|
||||
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "DEBUG")
|
||||
|
||||
import importlib
|
||||
|
||||
import vllm.envs
|
||||
|
||||
importlib.reload(vllm.envs)
|
||||
|
||||
config = CompilationConfig()
|
||||
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
|
||||
assert config.inductor_compile_config.get("size_asserts") is True
|
||||
assert config.inductor_compile_config.get("alignment_asserts") is True
|
||||
assert config.inductor_compile_config.get("scalar_asserts") is True
|
||||
|
||||
|
||||
def test_inductor_asserts_user_override(monkeypatch):
|
||||
"""Test that explicit inductor_compile_config overrides the
|
||||
debug-logging default."""
|
||||
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "INFO")
|
||||
|
||||
import importlib
|
||||
|
||||
import vllm.envs
|
||||
|
||||
importlib.reload(vllm.envs)
|
||||
|
||||
config = CompilationConfig(
|
||||
inductor_compile_config={"size_asserts": True},
|
||||
)
|
||||
assert config.inductor_compile_config.get("size_asserts") is True
|
||||
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
|
||||
assert config.inductor_compile_config.get("alignment_asserts") is False
|
||||
|
||||
Reference in New Issue
Block a user