[Dynamic Spec Decoding] Minor fix for disabling speculative decoding (#5000)

2024-05-25 10:00:14 -07:00
parent 325c119961
commit d5a1697772
3 changed files with 63 additions and 11 deletions
--- a/tests/spec_decode/test_dynamic_spec_decode.py
+++ b/tests/spec_decode/test_dynamic_spec_decode.py
@@ -1,4 +1,4 @@
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch

 import pytest
 import torch
@@ -13,9 +13,9 @@ from vllm.spec_decode.top1_proposer import Top1Proposer
 from .utils import create_batch, mock_worker


-@pytest.mark.parametrize('queue_size', [2, 4])
-@pytest.mark.parametrize('batch_size', [1, 2, 3, 6])
-@pytest.mark.parametrize('k', [1, 2, 5, 7, 10])
+@pytest.mark.parametrize('queue_size', [4])
+@pytest.mark.parametrize('batch_size', [1])
+@pytest.mark.parametrize('k', [1])
@torch.inference_mode()
 def test_disable_spec_tokens(queue_size: int, batch_size: int, k: int):
    """Verify that speculative tokens are disabled when the batch size
@@ -42,8 +42,12 @@ def test_disable_spec_tokens(queue_size: int, batch_size: int, k: int):
        num_lookahead_slots=k,
        running_queue_size=queue_size)

-    with pytest.raises(ValueError, match=exception_secret):
-        worker.execute_model(execute_model_req=execute_model_req)
+    if queue_size > disable_by_batch_size:
+        with patch.object(worker,
+                          '_run_no_spec',
+                          side_effect=ValueError(exception_secret)), \
+            pytest.raises(ValueError, match=exception_secret):
+            worker.execute_model(execute_model_req=execute_model_req)

    # When the batch size is larger than the threshold,
    # we expect no speculative tokens (0).