[Frontend] Use new Renderer for Completions and Tokenize API (#32863)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-31 20:51:15 +08:00
committed by GitHub
parent 8980001c93
commit f0a1c8453a
64 changed files with 2116 additions and 2003 deletions

View File

@@ -22,7 +22,11 @@ def test_context_length_too_short(vllm_runner, image_assets, model):
with pytest.raises(ValueError, match="longer than the maximum model length"):
vllm_model = vllm_runner(
model,
max_model_len=128, # LLaVA has a feature size of 576
# LLaVA has a feature size of 576
# For the HF processor to execute successfully but still
# failing the overall context length check, we need the
# max_model_len to at least contain all image tokens
max_model_len=579,
enforce_eager=True,
load_format="dummy",
)