[Chore] Rename SchedulerConfig.chunked_prefill_enabled (#28735)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -272,7 +272,7 @@ def test_speculators_model_integration(
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["model_setup", "mm_enabled", "chunked_prefill_enabled"],
|
||||
["model_setup", "mm_enabled", "enable_chunked_prefill"],
|
||||
[
|
||||
(("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False, False),
|
||||
pytest.param(
|
||||
@@ -358,7 +358,7 @@ def test_eagle_correctness(
|
||||
sampling_config: SamplingParams,
|
||||
model_setup: tuple[str, str, str, int],
|
||||
mm_enabled: bool,
|
||||
chunked_prefill_enabled: bool,
|
||||
enable_chunked_prefill: bool,
|
||||
attn_backend: str,
|
||||
):
|
||||
if attn_backend == "TREE_ATTN":
|
||||
@@ -396,9 +396,7 @@ def test_eagle_correctness(
|
||||
|
||||
method, model_name, spec_model_name, tp_size = model_setup
|
||||
max_model_len = 2048
|
||||
max_num_batched_tokens = max_model_len
|
||||
if chunked_prefill_enabled:
|
||||
max_num_batched_tokens = 128
|
||||
max_num_batched_tokens = 128 if enable_chunked_prefill else max_model_len
|
||||
|
||||
ref_llm = LLM(
|
||||
model=model_name, max_model_len=max_model_len, tensor_parallel_size=tp_size
|
||||
@@ -420,7 +418,7 @@ def test_eagle_correctness(
|
||||
},
|
||||
max_model_len=max_model_len,
|
||||
max_num_batched_tokens=max_num_batched_tokens,
|
||||
enable_chunked_prefill=chunked_prefill_enabled,
|
||||
enable_chunked_prefill=enable_chunked_prefill,
|
||||
)
|
||||
spec_outputs = spec_llm.chat(test_prompts, sampling_config)
|
||||
matches = 0
|
||||
|
||||
Reference in New Issue
Block a user