[Chore] Rename SchedulerConfig.chunked_prefill_enabled (#28735)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-11-15 02:39:57 +08:00
parent 67187554dd
commit e2741f6cbc
9 changed files with 21 additions and 19 deletions
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -272,7 +272,7 @@ def test_speculators_model_integration(


@pytest.mark.parametrize(
-    ["model_setup", "mm_enabled", "chunked_prefill_enabled"],
+    ["model_setup", "mm_enabled", "enable_chunked_prefill"],
    [
        (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False, False),
        pytest.param(
@@ -358,7 +358,7 @@ def test_eagle_correctness(
    sampling_config: SamplingParams,
    model_setup: tuple[str, str, str, int],
    mm_enabled: bool,
-    chunked_prefill_enabled: bool,
+    enable_chunked_prefill: bool,
    attn_backend: str,
 ):
    if attn_backend == "TREE_ATTN":
@@ -396,9 +396,7 @@ def test_eagle_correctness(

        method, model_name, spec_model_name, tp_size = model_setup
        max_model_len = 2048
-        max_num_batched_tokens = max_model_len
-        if chunked_prefill_enabled:
-            max_num_batched_tokens = 128
+        max_num_batched_tokens = 128 if enable_chunked_prefill else max_model_len

        ref_llm = LLM(
            model=model_name, max_model_len=max_model_len, tensor_parallel_size=tp_size
@@ -420,7 +418,7 @@ def test_eagle_correctness(
            },
            max_model_len=max_model_len,
            max_num_batched_tokens=max_num_batched_tokens,
-            enable_chunked_prefill=chunked_prefill_enabled,
+            enable_chunked_prefill=enable_chunked_prefill,
        )
        spec_outputs = spec_llm.chat(test_prompts, sampling_config)
        matches = 0