[Doc] Explicitly state that PP isn't compatible with speculative decoding yet (#10975)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-08 01:20:49 +08:00
committed by GitHub
parent 39e227c7ae
commit c889d5888b
8 changed files with 32 additions and 9 deletions

View File

@@ -247,9 +247,19 @@ def _compare_tp(
*,
method: Literal["generate", "encode"],
):
tp_size, pp_size, eager_mode, chunked_prefill = parallel_setup
multi_node_only, trust_remote_code, tokenizer_mode, \
load_format, hf_overrides = test_options
(
tp_size,
pp_size,
eager_mode,
chunked_prefill,
) = parallel_setup
(
multi_node_only,
trust_remote_code,
tokenizer_mode,
load_format,
hf_overrides,
) = test_options
if num_gpus_available < tp_size * pp_size:
pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs")