Fix ExaoneMoeMTP test that never ran in Transformers v4 (#36792)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-03-11 17:10:23 +00:00
committed by GitHub
parent 196802dfa6
commit 5efa206a8c
4 changed files with 17 additions and 0 deletions

View File

@@ -136,6 +136,10 @@ def can_initialize(
if model_arch == "WhisperForConditionalGeneration":
m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
kwargs = {}
if not model_info.enable_prefix_caching:
kwargs["enable_prefix_caching"] = False
LLM(
model_info.default,
tokenizer=model_info.tokenizer,
@@ -165,6 +169,7 @@ def can_initialize(
hf_overrides=hf_overrides_fn,
max_num_seqs=model_info.max_num_seqs,
attention_config=attention_config,
**kwargs,
)