Avoid bytecode hook and simplify TorchCompileWrapperWithCustomDipatch (#25110)

Signed-off-by: Laith Sakka <lsakka@meta.com>
2025-11-14 14:11:10 -08:00
parent 5a84b76b86
commit 2e0ad629b0
10 changed files with 409 additions and 223 deletions
--- a/tests/models/multimodal/generation/test_qwen2_5_vl.py
+++ b/tests/models/multimodal/generation/test_qwen2_5_vl.py
@@ -34,6 +34,7 @@ VIDEO_PROMPTS = VIDEO_ASSETS.prompts(
@pytest.mark.parametrize("num_frames", [16])
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
+@pytest.mark.parametrize("use_bytecode_hook", [True, False])
 def test_qwen2_5_vl_evs_functionality(
    vllm_runner,
    video_assets,
@@ -42,10 +43,14 @@ def test_qwen2_5_vl_evs_functionality(
    num_frames: int,
    dtype: str,
    max_tokens: int,
+    use_bytecode_hook: bool,
+    monkeypatch,
 ) -> None:
    """Test EVS (Efficient Video Sampling) functionality with different
    pruning rates.
    """
+    # Set the environment variable for this test
+    monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1" if use_bytecode_hook else "0")

    # Sample frames from video assets
    sampled_vids = [
@@ -86,6 +91,7 @@ def test_qwen2_5_vl_evs_functionality(
@pytest.mark.parametrize("num_frames", [16])
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
+@pytest.mark.parametrize("use_bytecode_hook", [True, False])
 def test_qwen2_5_vl_evs_batched_videos(
    vllm_runner,
    video_assets,
@@ -94,6 +100,8 @@ def test_qwen2_5_vl_evs_batched_videos(
    num_frames: int,
    dtype: str,
    max_tokens: int,
+    use_bytecode_hook: bool,
+    monkeypatch,
 ) -> None:
    """Test EVS functionality with batched videos.

@@ -102,6 +110,8 @@ def test_qwen2_5_vl_evs_batched_videos(
    2. Both pruning configurations work with multiple videos
    3. The model doesn't crash when processing multiple videos simultaneously
    """
+    # Set the environment variable for this test
+    monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1" if use_bytecode_hook else "0")
    # Sample frames from video assets
    sampled_vids = [
        sample_frames_from_video(asset.np_ndarrays, num_frames)