[Doc] Explicitly state that PP isn't compatible with speculative decoding yet (#10975)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-12-08 01:20:49 +08:00
parent 39e227c7ae
commit c889d5888b
8 changed files with 32 additions and 9 deletions
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -247,9 +247,19 @@ def _compare_tp(
    *,
    method: Literal["generate", "encode"],
 ):
-    tp_size, pp_size, eager_mode, chunked_prefill = parallel_setup
-    multi_node_only, trust_remote_code, tokenizer_mode, \
-        load_format, hf_overrides = test_options
+    (
+        tp_size,
+        pp_size,
+        eager_mode,
+        chunked_prefill,
+    ) = parallel_setup
+    (
+        multi_node_only,
+        trust_remote_code,
+        tokenizer_mode,
+        load_format,
+        hf_overrides,
+    ) = test_options

    if num_gpus_available < tp_size * pp_size:
        pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs")