[BugFix] Fix non detected failing tests (#30277)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
2025-12-09 18:57:55 +01:00
parent 804e3468c0
commit 0b6a8a304c
6 changed files with 73 additions and 34 deletions
--- a/vllm/compilation/inductor_pass.py
+++ b/vllm/compilation/inductor_pass.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+from __future__ import annotations
+
 import functools
 import hashlib
 import inspect
@@ -8,15 +10,17 @@ import json
 import types
 from collections.abc import Callable
 from contextlib import contextmanager
-from typing import Any
+from typing import TYPE_CHECKING, Any

 import torch
 from torch import fx
 from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily

-from vllm.config.utils import Range
 from vllm.utils.torch_utils import is_torch_equal_or_newer

+if TYPE_CHECKING:
+    from vllm.config.utils import Range
+
 if is_torch_equal_or_newer("2.6"):
    from torch._inductor.custom_graph_pass import CustomGraphPass
 else:
--- a/vllm/compilation/piecewise_backend.py
+++ b/vllm/compilation/piecewise_backend.py
@@ -53,8 +53,27 @@ class PiecewiseBackend:
        self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1

        self.is_full_graph = total_piecewise_compiles == 1
+        # TODO: we need to generalize encoder compilation to other models
+        self.is_encoder_compilation = vllm_backend.prefix in [
+            "Qwen2_5_VisionPatchEmbed",
+            "Qwen2_5_VisionPatchMerger",
+            "Qwen2_5_VisionBlock",
+        ]

        self.compile_ranges = self.compilation_config.get_compile_ranges()
+        if self.is_encoder_compilation:
+            # For encoder compilation we use the max int32 value
+            # to set the upper bound of the compile ranges
+            max_int32 = 2**31 - 1
+            last_compile_range = self.compile_ranges[-1]
+            assert (
+                last_compile_range.end
+                == vllm_config.scheduler_config.max_num_batched_tokens
+            )
+            self.compile_ranges[-1] = Range(
+                start=last_compile_range.start, end=max_int32
+            )
+
        log_string = f"PiecewiseBackend: compile_ranges: {self.compile_ranges}"
        logger.debug_once(log_string)