[Perf] Disable inductor runtime asserts by default for serving perfor… (#37485)

Signed-off-by: tianrengao <terrygao87@gmail.com>
Co-authored-by: Tianren Gao <tianren@fb.com>
This commit is contained in:
Terry Gao
2026-03-24 16:37:51 -07:00
committed by GitHub
parent a0d487b2e1
commit 82580b10ac
3 changed files with 95 additions and 0 deletions

View File

@@ -5,6 +5,7 @@ from contextlib import nullcontext
from unittest.mock import MagicMock, patch
import pytest
import torch
from pydantic import ValidationError
from vllm.compilation.counter import compilation_counter
@@ -612,3 +613,58 @@ def test_adjust_cudagraph_sizes_for_mamba_cache(
# Invariant: last element == max_cudagraph_capture_size
if expected_sizes:
assert config.cudagraph_capture_sizes[-1] == config.max_cudagraph_capture_size
def test_inductor_asserts_default_disabled(monkeypatch):
"""Test that inductor runtime asserts are disabled by default
(INFO logging level) on torch < 2.12."""
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "INFO")
import importlib
import vllm.envs
importlib.reload(vllm.envs)
config = CompilationConfig()
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
assert config.inductor_compile_config.get("size_asserts") is False
assert config.inductor_compile_config.get("alignment_asserts") is False
assert config.inductor_compile_config.get("scalar_asserts") is False
def test_inductor_asserts_enabled_in_debug(monkeypatch):
"""Test that VLLM_LOGGING_LEVEL=DEBUG enables inductor runtime asserts
on torch < 2.12."""
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "DEBUG")
import importlib
import vllm.envs
importlib.reload(vllm.envs)
config = CompilationConfig()
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
assert config.inductor_compile_config.get("size_asserts") is True
assert config.inductor_compile_config.get("alignment_asserts") is True
assert config.inductor_compile_config.get("scalar_asserts") is True
def test_inductor_asserts_user_override(monkeypatch):
"""Test that explicit inductor_compile_config overrides the
debug-logging default."""
monkeypatch.setenv("VLLM_LOGGING_LEVEL", "INFO")
import importlib
import vllm.envs
importlib.reload(vllm.envs)
config = CompilationConfig(
inductor_compile_config={"size_asserts": True},
)
assert config.inductor_compile_config.get("size_asserts") is True
if not _is_torch_equal_or_newer(torch.__version__, "2.12.0.dev"):
assert config.inductor_compile_config.get("alignment_asserts") is False