Remove unused EVS functions in qwen3_vl.py (#37183)

Signed-off-by: Tianyu Guo <guoty9@mail2.sysu.edu.cn>
2026-03-16 21:09:09 +08:00
parent ffbc2e5bdb
commit 43a73f853b
1 changed files with 0 additions and 101 deletions
--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@@ -1959,107 +1959,6 @@ class Qwen3VLForConditionalGeneration(
            else:
                raise ValueError(f"Unsupported modality: {mm_feature.modality}")

-    def _get_evs_mask_segments(
-        self, mm_position: PlaceholderRange, expected_frames: int
-    ) -> list[torch.Tensor] | None:
-        """Extract contiguous segments from EVS is_embed mask.
-
-        The EVS (Efficient Video Sampling) mask marks which placeholder
-        positions should be filled with video embeddings. This method splits
-        the mask into contiguous segments, where each segment represents one
-        retained frame.
-
-        This is a pure function - it does not modify any state and always
-        returns the same output for the same input (idempotent).
-
-        Args:
-            mm_position: MultiModal position containing the is_embed mask
-            expected_frames: Expected number of frame segments
-
-        Returns:
-            List of tensors, each containing indices for one frame segment,
-            or None if EVS is not enabled or validation fails.
-        """
-        is_embed_mask = getattr(mm_position, "is_embed", None)
-        if is_embed_mask is None:
-            return None
-
-        # Find all True positions in the mask
-        mask_tensor = torch.as_tensor(is_embed_mask, dtype=torch.bool).view(-1)
-        true_indices = torch.nonzero(mask_tensor, as_tuple=False).flatten()
-        if true_indices.numel() == 0:
-            return None
-
-        # Split into contiguous segments (where diff > 1 indicates a gap)
-        if true_indices.numel() == 1:
-            segments = [true_indices]
-        else:
-            diffs = torch.diff(true_indices)
-            split_points = torch.nonzero(diffs != 1, as_tuple=False).flatten()
-            if split_points.numel() == 0:
-                segments = [true_indices]
-            else:
-                segments = torch.tensor_split(
-                    true_indices, split_points.add(1).tolist()
-                )
-
-        # Validate segment count matches expected frames
-        if len(segments) < expected_frames:
-            logger.debug(
-                "EVS mask segments (%d) do not match expected frames (%d)",
-                len(segments),
-                expected_frames,
-            )
-            return None
-
-        return segments[:expected_frames]
-
-    def _extract_frame_offsets_from_mask(
-        self, mm_position: PlaceholderRange, expected_frames: int
-    ) -> list[int] | None:
-        """Return relative offsets for each EVS-retained frame.
-
-        The prompt processor stores a boolean mask inside ``mm_position`` that
-        marks which placeholder locations should be populated with video
-        embeddings. By splitting that mask into contiguous runs we can recover
-        the start of every retained frame without probing ``input_tokens``.
-
-        Args:
-            mm_position: MultiModal position containing the is_embed mask
-            expected_frames: Expected number of frames
-
-        Returns:
-            List of starting offsets (relative to mm_position) for each frame,
-            or None if EVS is not enabled.
-        """
-        segments = self._get_evs_mask_segments(mm_position, expected_frames)
-        if segments is None:
-            return None
-
-        return [int(segment[0].item()) for segment in segments]
-
-    def _get_actual_frame_token_counts(
-        self, mm_position: PlaceholderRange, expected_frames: int
-    ) -> list[int] | None:
-        """Return actual token count for each EVS-retained frame.
-
-        This function calculates the actual number of tokens per frame by
-        analyzing the is_embed mask, accounting for EVS pruning. Each frame
-        may have a different token count due to content-aware pruning.
-
-        Args:
-            mm_position: MultiModal position containing the is_embed mask
-            expected_frames: Expected number of frames
-
-        Returns:
-            List of token counts for each frame, or None if EVS is not enabled.
-        """
-        segments = self._get_evs_mask_segments(mm_position, expected_frames)
-        if segments is None:
-            return None
-
-        return [len(seg) for seg in segments]
-
    def get_mrope_input_positions(
        self,
        input_tokens: list[int],