[Bugfix] Fix for Spec model TP + Chunked Prefill (#10232)
Signed-off-by: andoorve <37849411+andoorve@users.noreply.github.com> Signed-off-by: Sourashis Roy <sroy@roblox.com> Co-authored-by: Sourashis Roy <sroy@roblox.com>
This commit is contained in:
committed by
GitHub
parent
1f6584ee85
commit
db66e018ea
@@ -1409,16 +1409,6 @@ class SpeculativeConfig:
|
||||
draft_hf_config
|
||||
)
|
||||
|
||||
if (enable_chunked_prefill and \
|
||||
speculative_draft_tensor_parallel_size != 1):
|
||||
# TODO - Investigate why the error reported in
|
||||
# https://github.com/vllm-project/vllm/pull/9291#issuecomment-2463266258
|
||||
# is happening and re-enable it.
|
||||
raise ValueError(
|
||||
"Chunked prefill and speculative decoding can be enabled "
|
||||
"simultaneously only for draft models with tensor "
|
||||
"parallel size 1.")
|
||||
|
||||
draft_model_config.max_model_len = (
|
||||
SpeculativeConfig._maybe_override_draft_max_model_len(
|
||||
speculative_max_model_len,
|
||||
|
||||
Reference in New Issue
Block a user