[Core] Multi-Step + Single Step Prefills via Chunked Prefill code path (#8378)
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
c5d55356f9
commit
c2ec430ab5
@@ -980,9 +980,13 @@ class EngineArgs:
|
||||
if speculative_config is not None:
|
||||
raise ValueError("Speculative decoding is not supported with "
|
||||
"multi-step (--num-scheduler-steps > 1)")
|
||||
if self.enable_chunked_prefill:
|
||||
raise ValueError("Chunked prefill is not supported with "
|
||||
"multi-step (--num-scheduler-steps > 1)")
|
||||
if self.enable_chunked_prefill and self.enable_prefix_caching:
|
||||
raise ValueError("Multi-Step is not supported with "
|
||||
"both Chunked-Prefill and Prefix-Caching "
|
||||
"enabled together.")
|
||||
if self.enable_chunked_prefill and self.pipeline_parallel_size > 1:
|
||||
raise ValueError("Multi-Step Chunked-Prefill is not supported "
|
||||
"for pipeline-parallel-size > 1")
|
||||
|
||||
# make sure num_lookahead_slots is set the higher value depending on
|
||||
# if we are using speculative decoding or multi-step
|
||||
|
||||
Reference in New Issue
Block a user