[v1] Add PrefixLM support to FlexAttention backend (#27938)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-12-07 23:51:36 +08:00
parent 541a2ef892
commit b952f4d3c3
16 changed files with 173 additions and 25 deletions
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -382,7 +382,6 @@ VLM_TEST_SETTINGS = {
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
        patch_hf_runner=model_utils.gemma3_patch_hf_runner,
-        num_logprobs=10,
    ),
    "glm4v": VLMTestInfo(
        models=["zai-org/glm-4v-9b"],