[V1][Usage] Refactor speculative decoding configuration and tests (#14434)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
2025-03-23 13:28:10 +08:00
parent 0661cfef7a
commit 50c9636d87
20 changed files with 1055 additions and 802 deletions
--- a/tests/v1/e2e/test_ngram_spec_decode.py
+++ b/tests/v1/e2e/test_ngram_spec_decode.py
@@ -70,12 +70,16 @@ def test_ngram_correctness(
        ref_outputs = ref_llm.chat(test_prompts, sampling_config)
        del ref_llm

-        spec_llm = LLM(model=model_name,
-                       speculative_model='[ngram]',
-                       ngram_prompt_lookup_max=5,
-                       ngram_prompt_lookup_min=3,
-                       num_speculative_tokens=3,
-                       max_model_len=1024)
+        spec_llm = LLM(
+            model=model_name,
+            speculative_config={
+                "method": "ngram",
+                "prompt_lookup_max": 5,
+                "prompt_lookup_min": 3,
+                "num_speculative_tokens": 3,
+            },
+            max_model_len=1024,
+        )
        spec_outputs = spec_llm.chat(test_prompts, sampling_config)
        matches = 0
        misses = 0