tests/v1/e2e/spec_decode: assert async scheduling is used (#39206)
Signed-off-by: Rishi Puri <riship@nvidia.com> Signed-off-by: Rishi Puri <puririshi98@berkeley.edu> Signed-off-by: sfeng33 <4florafeng@gmail.com> Co-authored-by: Benjamin Chislett <chislett.ben@gmail.com> Co-authored-by: Flora Feng <4florafeng@gmail.com>
This commit is contained in:
@@ -116,6 +116,11 @@ def test_no_sync_with_spec_decode(
|
||||
async_scheduling=True,
|
||||
)
|
||||
|
||||
# Assert async scheduling is actually active before running inference.
|
||||
assert llm.llm_engine.vllm_config.scheduler_config.async_scheduling, (
|
||||
f"Expected async_scheduling=True for spec decode, got False. method={method}"
|
||||
)
|
||||
|
||||
outputs = llm.generate(
|
||||
["Hello, my name is"],
|
||||
SamplingParams(temperature=0, max_tokens=10),
|
||||
|
||||
Reference in New Issue
Block a user