[BugFix] fix num_lookahead_slots missing in async executor (#4165)

Co-authored-by: Lei Wen <wenlei03@qiyi.com>
2024-05-01 01:12:59 +08:00
parent 26f2fb5113
commit 4bb53e2dde
9 changed files with 163 additions and 19 deletions
--- a/tests/spec_decode/e2e/test_compatibility.py
+++ b/tests/spec_decode/e2e/test_compatibility.py
@@ -42,10 +42,17 @@ def test_spec_decode_xfail_ray(test_llm_generator):
        temperature=temperature,
    )

-    with pytest.raises(AssertionError,
-                       match="Speculative decoding not yet supported for "):
-        get_output_from_llm_generator(test_llm_generator, prompts,
-                                      sampling_params)
+    try:
+        with pytest.raises(
+                AssertionError,
+                match="Speculative decoding not yet supported for "):
+            get_output_from_llm_generator(test_llm_generator, prompts,
+                                          sampling_params)
+    finally:
+        # we need to free up ray resource,
+        # so that latter test could use the gpu we allocated here
+        import ray
+        ray.shutdown()


@pytest.mark.parametrize(