[Misc] improve warning/assert messages (#32226)

Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
2026-01-13 12:11:23 +09:00
parent c6bb5b5603
commit 15b33ff064
6 changed files with 19 additions and 19 deletions
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -546,7 +546,7 @@ class InductorAdaptor(CompilerInterface):
                    hash_str, example_inputs, True, False
                )
                assert inductor_compiled_graph is not None, (
-                    "Inductor cache lookup failed. Please remove"
+                    "Inductor cache lookup failed. Please remove "
                    f"the cache directory and try again."  # noqa
                )
            elif torch.__version__ >= "2.6":
@@ -557,7 +557,7 @@ class InductorAdaptor(CompilerInterface):
                    hash_str, example_inputs, True, None, constants
                )
                assert inductor_compiled_graph is not None, (
-                    "Inductor cache lookup failed. Please remove"
+                    "Inductor cache lookup failed. Please remove "
                    f"the cache directory and try again."  # noqa
                )

--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -949,8 +949,8 @@ class CompilationConfig:
                    )
                if self.cudagraph_mode == CUDAGraphMode.PIECEWISE:
                    logger.warning_once(
-                        "Piecewise compilation with empty splitting_ops do not"
-                        "contains piecewise cudagraph. Setting cudagraph_"
+                        "Piecewise compilation with empty splitting_ops does not "
+                        "contain piecewise cudagraph. Setting cudagraph_"
                        "mode to NONE. Hint: If you are using attention "
                        "backends that support cudagraph, consider manually "
                        "setting cudagraph_mode to FULL or FULL_DECODE_ONLY "
@@ -959,8 +959,8 @@ class CompilationConfig:
                    self.cudagraph_mode = CUDAGraphMode.NONE
                elif self.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE:
                    logger.warning_once(
-                        "Piecewise compilation with empty splitting_ops do "
-                        "not contains piecewise cudagraph. Setting "
+                        "Piecewise compilation with empty splitting_ops does "
+                        "not contain piecewise cudagraph. Setting "
                        "cudagraph_mode to FULL."
                    )
                    self.cudagraph_mode = CUDAGraphMode.FULL
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1494,7 +1494,7 @@ class ModelConfig:

        if self.runner_type != "pooling" and head_dtype != self.dtype:
            logger.warning_once(
-                "`head_dtype` currently only supports pooling models."
+                "`head_dtype` currently only supports pooling models, "
                "fallback to model dtype [%s].",
                self.dtype,
            )
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -672,9 +672,9 @@ class VllmConfig:
            and self.compilation_config.mode != CompilationMode.VLLM_COMPILE
        ):
            logger.warning(
-                "Inductor compilation was disabled by user settings,"
-                "Optimizations settings that are only active during"
-                "Inductor compilation will be ignored."
+                "Inductor compilation was disabled by user settings, "
+                "optimizations settings that are only active during "
+                "inductor compilation will be ignored."
            )

        def has_blocked_weights():
@@ -790,7 +790,7 @@ class VllmConfig:

            logger.warning_once(
                "--kv-sharing-fast-prefill requires changes on model side for "
-                "correctness and to realize prefill savings. "
+                "correctness and to realize prefill savings."
            )
        # TODO: Move after https://github.com/vllm-project/vllm/pull/26847 lands
        self._set_compile_ranges()
@@ -813,7 +813,7 @@ class VllmConfig:
            and not self.cache_config.enable_prefix_caching
        ):
            logger.warning(
-                "KV cache events are on, but prefix caching is not enabled."
+                "KV cache events are on, but prefix caching is not enabled. "
                "Use --enable-prefix-caching to enable."
            )
        if (
@@ -822,9 +822,9 @@ class VllmConfig:
            and not self.kv_events_config.enable_kv_cache_events
        ):
            logger.warning(
-                "KV cache events are disabled,"
-                "but the scheduler is configured to publish them."
-                "Modify KVEventsConfig.enable_kv_cache_events"
+                "KV cache events are disabled, "
+                "but the scheduler is configured to publish them. "
+                "Modify KVEventsConfig.enable_kv_cache_events "
                "to True to enable."
            )
        current_platform.check_and_update_config(self)
@@ -893,7 +893,7 @@ class VllmConfig:
                        else "pipeline parallelism"
                    )
                    logger.warning_once(
-                        "Sequence parallelism not supported with"
+                        "Sequence parallelism not supported with "
                        "native rms_norm when using %s, "
                        "this will likely lead to an error.",
                        regime,
@@ -910,7 +910,7 @@ class VllmConfig:
                logger.warning_once(
                    "No piecewise cudagraph for executing cascade attention."
                    " Will fall back to eager execution if a batch runs "
-                    "into cascade attentions"
+                    "into cascade attentions."
                )

            if self.compilation_config.cudagraph_mode.requires_piecewise_compilation():
--- a/vllm/lora/ops/triton_ops/utils.py
+++ b/vllm/lora/ops/triton_ops/utils.py
@@ -170,7 +170,7 @@ def load_lora_op_config(op_type: str, add_inputs: bool | None) -> dict | None:

        config_path = Path(f"{user_defined_config_folder}/{config_fname}")
        if not config_path.exists():
-            logger.warning_once(f"No LoRA kernel configs founded in {config_path}")
+            logger.warning_once(f"No LoRA kernel configs found in {config_path}")
            return None

        # Load json
--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -67,7 +67,7 @@ def get_flash_attn_version(requires_alibi: bool = False) -> int | None:
        # 3. fallback for unsupported combinations
        if device_capability.major == 10 and fa_version == 3:
            logger.warning_once(
-                "Cannot use FA version 3 on Blackwell platform "
+                "Cannot use FA version 3 on Blackwell platform, "
                "defaulting to FA version 2."
            )
            fa_version = 2