[Misc] improve warning/assert messages (#32226)

Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
This commit is contained in:
cjackal
2026-01-13 12:11:23 +09:00
committed by GitHub
parent c6bb5b5603
commit 15b33ff064
6 changed files with 19 additions and 19 deletions

View File

@@ -546,7 +546,7 @@ class InductorAdaptor(CompilerInterface):
hash_str, example_inputs, True, False
)
assert inductor_compiled_graph is not None, (
"Inductor cache lookup failed. Please remove"
"Inductor cache lookup failed. Please remove "
f"the cache directory and try again." # noqa
)
elif torch.__version__ >= "2.6":
@@ -557,7 +557,7 @@ class InductorAdaptor(CompilerInterface):
hash_str, example_inputs, True, None, constants
)
assert inductor_compiled_graph is not None, (
"Inductor cache lookup failed. Please remove"
"Inductor cache lookup failed. Please remove "
f"the cache directory and try again." # noqa
)

View File

@@ -949,8 +949,8 @@ class CompilationConfig:
)
if self.cudagraph_mode == CUDAGraphMode.PIECEWISE:
logger.warning_once(
"Piecewise compilation with empty splitting_ops do not"
"contains piecewise cudagraph. Setting cudagraph_"
"Piecewise compilation with empty splitting_ops does not "
"contain piecewise cudagraph. Setting cudagraph_"
"mode to NONE. Hint: If you are using attention "
"backends that support cudagraph, consider manually "
"setting cudagraph_mode to FULL or FULL_DECODE_ONLY "
@@ -959,8 +959,8 @@ class CompilationConfig:
self.cudagraph_mode = CUDAGraphMode.NONE
elif self.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE:
logger.warning_once(
"Piecewise compilation with empty splitting_ops do "
"not contains piecewise cudagraph. Setting "
"Piecewise compilation with empty splitting_ops does "
"not contain piecewise cudagraph. Setting "
"cudagraph_mode to FULL."
)
self.cudagraph_mode = CUDAGraphMode.FULL

View File

@@ -1494,7 +1494,7 @@ class ModelConfig:
if self.runner_type != "pooling" and head_dtype != self.dtype:
logger.warning_once(
"`head_dtype` currently only supports pooling models."
"`head_dtype` currently only supports pooling models, "
"fallback to model dtype [%s].",
self.dtype,
)

View File

@@ -672,9 +672,9 @@ class VllmConfig:
and self.compilation_config.mode != CompilationMode.VLLM_COMPILE
):
logger.warning(
"Inductor compilation was disabled by user settings,"
"Optimizations settings that are only active during"
"Inductor compilation will be ignored."
"Inductor compilation was disabled by user settings, "
"optimizations settings that are only active during "
"inductor compilation will be ignored."
)
def has_blocked_weights():
@@ -790,7 +790,7 @@ class VllmConfig:
logger.warning_once(
"--kv-sharing-fast-prefill requires changes on model side for "
"correctness and to realize prefill savings. "
"correctness and to realize prefill savings."
)
# TODO: Move after https://github.com/vllm-project/vllm/pull/26847 lands
self._set_compile_ranges()
@@ -813,7 +813,7 @@ class VllmConfig:
and not self.cache_config.enable_prefix_caching
):
logger.warning(
"KV cache events are on, but prefix caching is not enabled."
"KV cache events are on, but prefix caching is not enabled. "
"Use --enable-prefix-caching to enable."
)
if (
@@ -822,9 +822,9 @@ class VllmConfig:
and not self.kv_events_config.enable_kv_cache_events
):
logger.warning(
"KV cache events are disabled,"
"but the scheduler is configured to publish them."
"Modify KVEventsConfig.enable_kv_cache_events"
"KV cache events are disabled, "
"but the scheduler is configured to publish them. "
"Modify KVEventsConfig.enable_kv_cache_events "
"to True to enable."
)
current_platform.check_and_update_config(self)
@@ -893,7 +893,7 @@ class VllmConfig:
else "pipeline parallelism"
)
logger.warning_once(
"Sequence parallelism not supported with"
"Sequence parallelism not supported with "
"native rms_norm when using %s, "
"this will likely lead to an error.",
regime,
@@ -910,7 +910,7 @@ class VllmConfig:
logger.warning_once(
"No piecewise cudagraph for executing cascade attention."
" Will fall back to eager execution if a batch runs "
"into cascade attentions"
"into cascade attentions."
)
if self.compilation_config.cudagraph_mode.requires_piecewise_compilation():

View File

@@ -170,7 +170,7 @@ def load_lora_op_config(op_type: str, add_inputs: bool | None) -> dict | None:
config_path = Path(f"{user_defined_config_folder}/{config_fname}")
if not config_path.exists():
logger.warning_once(f"No LoRA kernel configs founded in {config_path}")
logger.warning_once(f"No LoRA kernel configs found in {config_path}")
return None
# Load json

View File

@@ -67,7 +67,7 @@ def get_flash_attn_version(requires_alibi: bool = False) -> int | None:
# 3. fallback for unsupported combinations
if device_capability.major == 10 and fa_version == 3:
logger.warning_once(
"Cannot use FA version 3 on Blackwell platform "
"Cannot use FA version 3 on Blackwell platform, "
"defaulting to FA version 2."
)
fa_version = 2