[Multimodal][Speculative Decoding]Eagle3 mm support, enablement on qwen3vl (#29594)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: EanWang211123 <wangyiheng@sangfor.com.cn>
Co-authored-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
EanWang211123
2025-11-28 14:05:45 +08:00
committed by GitHub
parent c7ba1f6bc7
commit 37b15e97e8
5 changed files with 45 additions and 5 deletions

View File

@@ -283,6 +283,19 @@ def test_speculators_model_integration(
["model_setup", "mm_enabled", "enable_chunked_prefill"],
[
(("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False, False),
pytest.param(
(
"eagle3",
"Qwen/Qwen3-VL-8B-Instruct",
"taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3",
1,
),
False,
False,
marks=pytest.mark.skip(
reason="architecture of its eagle3 is LlamaForCausalLMEagle3"
),
),
pytest.param(
(
"eagle3",
@@ -352,6 +365,7 @@ def test_speculators_model_integration(
],
ids=[
"qwen3_eagle3",
"qwen3_vl_eagle3",
"qwen2_5_vl_eagle3",
"llama3_eagle",
"llama3_eagle3",