[Frontend] Use new Renderer for Completions and Tokenize API (#32863)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-31 20:51:15 +08:00
parent 8980001c93
commit f0a1c8453a
64 changed files with 2116 additions and 2003 deletions
--- a/tests/engine/test_short_mm_context.py
+++ b/tests/engine/test_short_mm_context.py
@@ -22,7 +22,11 @@ def test_context_length_too_short(vllm_runner, image_assets, model):
    with pytest.raises(ValueError, match="longer than the maximum model length"):
        vllm_model = vllm_runner(
            model,
-            max_model_len=128,  # LLaVA has a feature size of 576
+            # LLaVA has a feature size of 576
+            # For the HF processor to execute successfully but still
+            # failing the overall context length check, we need the
+            # max_model_len to at least contain all image tokens
+            max_model_len=579,
            enforce_eager=True,
            load_format="dummy",
        )